diff --git a/icu4j/src/com/ibm/icu/text/CollationElementIterator.java b/icu4j/src/com/ibm/icu/text/CollationElementIterator.java
index 0ac52f78ed7..23a856708b5 100755
--- a/icu4j/src/com/ibm/icu/text/CollationElementIterator.java
+++ b/icu4j/src/com/ibm/icu/text/CollationElementIterator.java
@@ -6,53 +6,77 @@ import com.ibm.icu.impl.NormalizerImpl;
 import com.ibm.icu.impl.UCharacterProperty;
 
 /**
- * <p>The <code>CollationElementIterator</code> class is used as an iterator
- * to walk through each character of an international string. Use the iterator
- * to return the ordering priority of the positioned character. The ordering
- * priority of a character, which we refer to as a key, defines how a 
- * character is collated in the given collation object.</p>
- * <p>For example, consider the following in Spanish:
+ * <p>
+ * The <code>CollationElementIterator</code> object is an iterator created
+ * by a RuleBasedCollator to walk through an international string. The return
+ * result of each iteration is a 32 bit collation element that defines the 
+ * ordering priority of the next sequence of characters in the source string. 
+ * </p>
+ * <p>For better illustration, consider the following in Spanish:
  * <blockquote>
  * <pre>
- * "ca" -> the first key is key('c') and second key is key('a').
- * "cha" -> the first key is key('ch') and second key is key('a').
+ * "ca" -> the first collation element is collation_element('c') and second 
+ *         collation element is collation_element('a').
+ * 
+ * Since "ch" in Spanish sorts as one entity, the below example returns one
+ * collation element for the 2 characters 'c' and 'h'
+ * 
+ * "cha" -> the first collation element is collation_element('ch') and second 
+ *          collation element is collation_element('a').
  * </pre>
  * </blockquote>
  * And in German,
  * <blockquote>
  * <pre>
- * "\u00e4b"-> the first key is key('a'), the second key is key('e'), and
- * the third key is key('b').
+ * Since the character '&#230;' is a composed character of 'a' and 'e', the
+ * below example returns 2 collation elements for the single character 
+ * '&#230;'
+ * 
+ * "&#230;b" -> the first collation element is collation_element('a'), the 
+ *              second collation element is collation_element('e'), and the 
+ *              third collation element is collation_element('b').
  * </pre>
  * </blockquote>
  * </p>
- * <p>The key of a character is an integer composed of primary order(short),
- * secondary order(byte), and tertiary order(byte). Java strictly defines
- * the size and signedness of its primitive data types. Therefore, the static
- * functions <code>primaryOrder</code>, <code>secondaryOrder</code>, and
- * <code>tertiaryOrder</code> return <code>int</code>, <code>short</code>,
- * and <code>short</code> respectively to ensure the correctness of the key
- * value.</p>
  * <p>
- * Example of the iterator usage,
+ * For collation ordering comparison, the collation element results can not be 
+ * compared simply by using basic arithmetric operators, e.g. &lt;, == or &gt;, 
+ * further processing has to be done. Details can be found in the ICU
+ * <a href=http://oss.software.ibm.com/icu/userguide/Collate_ServiceArchitecture.html>
+ * user guide</a>. An example of using the CollationElementIterator for
+ * collation ordering comparison is the class <a href=StringSearch.html>
+ * com.ibm.icu.text.StringSearch</a>.
+ * </p>
+ * <p>
+ * To construct a CollationElementIterator object, users would have to call the 
+ * factory method getCollationElementIterator() in a RuleBasedCollator object
+ * that defines the sorting order that is desired.
+ * </p>
+ * <p>
+ * Example:
  * <blockquote>
  * <pre>
  *  String testString = "This is a test";
- *  RuleBasedCollator ruleBasedCollator = (RuleBasedCollator)Collator.getInstance();
- *  CollationElementIterator collationElementIterator = ruleBasedCollator.getCollationElementIterator(testString);
- *  int primaryOrder = CollationElementIterator.primaryOrder(collationElementIterator.next());
+ *  RuleBasedCollator rbc = new RuleBasedCollator("&amp;a&lt;b");
+ *  CollationElementIterator collationElementIterator = rbc.getCollationElementIterator(testString);
+ *  int primaryOrder = CollationElementIterator.IGNORABLE;
+ *  while (primaryOrder != CollationElementIterator.NULLORDER) {
+ *      int order = collationElementIterator.next();
+ *      if (order != CollationElementIterator.IGNORABLE &&
+ *          order != CollationElementIterator.NULLORDER) {
+ *          // order is valid, not ignorable and we have not passed the end
+ *          // of the iteration, we do something
+ *          primaryOrder = CollationElementIterator.primaryOrder(order);
+ *          System.out.println("Next primary order 0x" + Integer.toHexString(primaryOrder));
+ *      }
+ *  }
  * </pre>
  * </blockquote>
  * </p>
  * <p>
- * <code>CollationElementIterator.next</code> returns the collation order
- * of the next character. A collation order consists of primary order,
- * secondary order and tertiary order. The data type of the collation
- * order is <strong>int</strong>. The first 16 bits of a collation order
- * is its primary order; the next 8 bits is the secondary order and the
- * last 8 bits is the tertiary order.</p>
- * @see                Collator
- * @see                RuleBasedCollator
+ * @see Collator
+ * @see RuleBasedCollator
+ * @see StringSearch
  * @author Syn Wee Quek
  * @since release 2.2, April 18 2002
  * @draft 2.2
@@ -62,12 +86,22 @@ public final class CollationElementIterator
 	// public data members --------------------------------------------------
 	
     /**
-     * Null order which indicates the end of string is reached
+     * <p>This constant is returned by the iterator in the methods next() and
+     * previous() when the end or the beginning of the source string has been
+     * reached, and there are no more valid collation elements to return.</p>
+     * <p>See class documentation for an example of use.</p>
      * @draft 2.2
+     * @see #next
+     * @see #previous
      */
     public final static int NULLORDER = 0xffffffff;
     /**
-     * Ignorable collation element order.
+     * <p>This constant is returned by the iterator in the methods next() and
+     * previous() when a collation element result is to be ignored.</p>
+     * <p>See class documentation for an example of use.</p>
+     * @draft 2.2
+     * @see #next
+     * @see #previous
      */
     public static final int IGNORABLE = 0;
 
@@ -76,24 +110,25 @@ public final class CollationElementIterator
 	// public getters -------------------------------------------------------
 	
 	/**
-     * <p>Returns the character offset in the original text corresponding to 
-     * the next collation element. (That is, getOffset() returns the position 
-     * in the text corresponding to the collation element that will be 
-     * returned by the next call to next().) This value could be either
+     * <p>Returns the character offset in the source string corresponding to 
+     * the next collation element. i.e. getOffset() returns the position 
+     * in source string corresponding to the collation element that will be 
+     * returned by the next call to next(). This value could be either
      * <ul>
-     * <li>index of the <b>first</b> character corresponding to the next
+     * <li> Index of the <b>first</b> character corresponding to the next
      * collation element. This means that if <code>setOffset(offset)</code> 
      * sets the index in the middle of a contraction, <code>getOffset()</code>
      * returns the index of the first character in the contraction, which
-     * may not be equals to offset.
-     * <li>if normalization is on, <code>getOffset()</code> may return the 
+     * may not be equals to the original offset that was set. Hence calling
+     * getOffset() immediately after setOffset(offset) does not guarantee that
+     * the original offset set will be returned.
+     * <li> If normalization is on, <code>getOffset()</code> may return the 
      * index of the <b>immediate</b> subsequent character, or composite 
      * character with the first character, having a combining class of 0.
+     * <li> the length of the source string if iteration has reached the end.
      * </ul>
      * </p>
-     * <p>Note calling getOffset() immediately after setOffset(offset) may not
-     * return the value offset.</p>
-     * @return The character offset in the original text corresponding to the 
+     * @return The character offset in the source string corresponding to the 
      *         collation element that will be returned by the next call to 
      *         next().
      * @draft 2.2
@@ -111,8 +146,11 @@ public final class CollationElementIterator
 
 
     /**
-     * Return the maximum length of any expansion sequences that end with the 
-     * specified collation element.
+     * <p>
+     * Returns the maximum length of any expansion sequence that ends with 
+     * the argument collation element ce. If there is no expansion with the 
+     * argument ce as the last element, 1 is returned.
+     * </p>
      * @param ce a collation element returned by previous() or next().
      * @return the maximum length of any expansion sequences ending
      *         with the specified collation element.
@@ -122,9 +160,11 @@ public final class CollationElementIterator
     {
         int start = 0;                                  
   		int limit = m_collator_.m_expansionEndCE_.length;
+  		long unsignedce = ce & 0xFFFFFFFFl;
   		while (start < limit - 1) {
     		int mid = start + ((limit - start) >> 1);              
-    		if (ce <= m_collator_.m_expansionEndCE_[mid]) {              
+    		long midce = m_collator_.m_expansionEndCE_[mid] & 0xFFFFFFFFl;
+    		if (unsignedce <= midce) {              
       			limit = mid;                                              
     		}                                                             
     		else {                                                        
@@ -135,7 +175,8 @@ public final class CollationElementIterator
   		if (m_collator_.m_expansionEndCE_[start] == ce) {
     		result = m_collator_.m_expansionEndCEMaxSize_[start];
   		}                                                                
-  		else if (m_collator_.m_expansionEndCE_[limit] == ce) {           
+  		else if (limit < m_collator_.m_expansionEndCE_.length &&
+  		         m_collator_.m_expansionEndCE_[limit] == ce) {           
          	result = m_collator_.m_expansionEndCEMaxSize_[limit]; 
        	}                                  
        	else if ((ce & 0xFFFF) == 0x00C0) {
@@ -147,34 +188,49 @@ public final class CollationElementIterator
 	// public other methods -------------------------------------------------
 	
 	/**
-     * <p>Resets the cursor to the beginning of the string. The next call
-     * to next() will return the first collation element in the string.</p>
+     * <p>
+     * Resets the cursor to the beginning of the string. The next call
+     * to next() and previous will return the first and last collation element 
+     * in the string respectively. 
+     * </p>
+     * <p> 
+     * If the RuleBasedCollator used in this iterator has its attributes 
+     * changed, calling reset() will reinitialize the iterator to use the new
+     * RuleBasedCollator attributes.
+     * </p>
      * @draft 2.2
      */
-    public synchronized void reset()
+    public void reset()
     {
-    	m_source_.setIndex(0);
+    	m_source_.setIndex(m_source_.getBeginIndex());
     	updateInternalState();
     }
 
     /**
-     * <p>Get the next collation element in the string.</p>  
-     * <p>This iterator iterates over a sequence of collation elements that 
-     * were built from the string. Because there isn't necessarily a 
-     * one-to-one mapping from characters to collation elements, this doesn't 
-     * mean the same thing as "return the collation element [or ordering 
-     * priority] of the next character in the string".</p>
-     * <p>This function returns the collation element that the iterator is 
+     * <p>
+     * Get the next collation element in the source string.
+     * </p>  
+     * <p>
+     * This iterator iterates over a sequence of collation elements that were 
+     * built from the string. Because there isn't necessarily a one-to-one 
+     * mapping from characters to collation elements, this doesn't mean the 
+     * same thing as "return the collation element [or ordering priority] of 
+     * the next character in the string".
+     * </p>
+     * <p>
+     * This function returns the collation element that the iterator is 
      * currently pointing to and then updates the internal pointer to point to 
      * the next element. previous() updates the pointer first and then 
      * returns the element. This means that when you change direction while 
      * iterating (i.e., call next() and then call previous(), or call 
      * previous() and then call next()), you'll get back the same element 
-     * twice.</p>
-     * @return the next collation element 
+     * twice.
+     * </p>
+     * @return the next collation element or NULLORDER if the end of the 
+     *         iteration has been reached.
      * @draft 2.2
      */
-    public synchronized int next()
+    public int next()
     {
     	m_isForwards_ = true;
         if (m_CEBufferSize_ > 0) { 
@@ -230,24 +286,30 @@ public final class CollationElementIterator
     }
 
     /**
-     * <p>Get the previous collation element in the string.</p>  
-     * <p>This iterator iterates over a sequence of collation elements that 
+     * <p>
+     * Get the previous collation element in the source string.
+     * </p>  
+     * <p>
+     * This iterator iterates over a sequence of collation elements that 
      * were built from the string. Because there isn't necessarily a 
      * one-to-one mapping from characters to collation elements, this doesn't 
      * mean the same thing as "return the collation element [or ordering 
-     * priority] of the previous character in the string".</p>
-     * <p>This function updates the iterator's internal pointer to point to 
+     * priority] of the previous character in the string".
+     * </p>
+     * <p>
+     * This function updates the iterator's internal pointer to point to 
      * the collation element preceding the one it's currently pointing to and 
      * then returns that element, while next() returns the current element and 
      * then updates the pointer. This means that when you change direction 
      * while iterating (i.e., call next() and then call previous(), or call 
      * previous() and then call next()), you'll get back the same element 
-     * twice.</p>
+     * twice.
+     * </p>
      * @return the previous collation element, or NULLORDER when the start of 
-     * 			the iteration has been reached.
+     * 		   the iteration has been reached.
      * @draft 2.2
      */
-    public synchronized int previous()
+    public int previous()
     {
     	if (m_source_.getIndex() <= 0 && m_isForwards_) {
     		// if iterator is new or reset, we can immediate perform  backwards
@@ -317,50 +379,66 @@ public final class CollationElementIterator
     }
 
     /**
-     * Return the primary strength of a collation element.
+     * Return the primary order of a collation element ce.
+     * i.e. the first 16 bits of the argument ce.
      * @param ce the collation element
-     * @return the element's primary strength
+     * @return the element's 16 bits primary order.
      * @draft 2.2
      */
     public final static int primaryOrder(int ce)
     {
-        return (ce & RuleBasedCollator.CE_PRIMARY_MASK_) >> CE_PRIMARY_SHIFT_;
+        return (ce & RuleBasedCollator.CE_PRIMARY_MASK_) 
+                >>> RuleBasedCollator.CE_PRIMARY_SHIFT_;
     }
     /**
-     * Return the secondary strength of a collation element.
+     * Return the secondary order of a collation element ce.
+     * i.e. the 16th to 27th inclusive set of bits in the argument ce.
      * @param ce the collation element
-     * @return the element's secondary strength
+     * @return the element's 8 bits secondary order
      * @draft 2.2
      */
-    public final static short secondaryOrder(int ce)
+    public final static int secondaryOrder(int ce)
     {
-        return (short)((ce & RuleBasedCollator.CE_SECONDARY_MASK_) 
-        											>> CE_SECONDARY_SHIFT_);
+        return (ce & RuleBasedCollator.CE_SECONDARY_MASK_) 
+               >> RuleBasedCollator.CE_SECONDARY_SHIFT_;
     }
     
     /**
-     * Return the tertiary strength of a collation element.
-     * @param colelem the collation element
-     * @return the element's tertiary strength
+     * Return the tertiary order of a collation element ce. i.e. the last
+     * 8 bits in the argument ce.
+     * @param ce the collation element
+     * @return the element's 8 bits tertiary order
      * @draft 2.2
      */
-    public final static short tertiaryOrder(int ce)
+    public final static int tertiaryOrder(int ce)
     {
-        return (short)(ce & RuleBasedCollator.CE_TERTIARY_MASK_);
+        return ce & RuleBasedCollator.CE_TERTIARY_MASK_;
     }
 
     /**
-     * <p>Sets the iterator to point to the collation element corresponding to
-     * the specified character (the parameter is a CHARACTER offset in the
-     * original string, not an offset into its corresponding sequence of
-     * collation elements). The value returned by the next call to next()
-     * will be the collation element corresponding to the specified position
-     * in the text. If that position is in the middle of a contracting
-     * character sequence, the result of the next call to next() is the
-     * collation element for that sequence. This means that getOffset()
-     * is not guaranteed to return the same value as was passed to a preceding
-     * call to setOffset().</p>
-     * @param offset new character offset into the original text to set. 
+     * <p>
+     * Sets the iterator to point to the collation element corresponding to
+     * the specified character argument offset. The value returned by the next 
+     * call to next() will be the collation element corresponding to the 
+     * characters at argument offset. 
+     * </p>
+     * <p>
+     * If argument offset is in the middle of a contracting character sequence, 
+     * the iterator is adjusted to the start of the contracting sequence. This 
+     * means that getOffset() is not guaranteed to return the same value as 
+     * the argument offset.
+     * </p>
+     * <p>
+     * If the decomposition mode is on and argument offset is in the middle of 
+     * a decomposible range of source text, the iterator may not render a 
+     * correct result for 
+     * the next forwards or backwards iteration. User has to ensure that the
+     * argument offset does not violate the mid of a decomposible range in
+     * source text.
+     * </p>
+     * @param offset character offset into the original source string to 
+     *        set. Note this argument is not an offset into the corresponding 
+     *        sequence of collation elements
      * @draft 2.2
      */
     public void setOffset(int offset)
@@ -388,7 +466,7 @@ public final class CollationElementIterator
     			}
     			updateInternalState();
     			int prevoffset = 0;
-    			while (m_source_.getIndex() < offset) {
+    			while (m_source_.getIndex() <= offset) {
     				prevoffset = m_source_.getIndex();
     				next();
     			}	
@@ -399,59 +477,36 @@ public final class CollationElementIterator
     }
 
     /**
-     * <p>Set a new string over which to iterate.</p>
-     * <p>Iteration will start from the start of source.</p>
-     * @param source the new source text.
+     * <p>
+     * Sets a new source string for iteration and restart the iteration to
+     * start from the beginning of the argument source.
+     * </p>
+     * @param source the new source string for iteration.
      * @draft 2.2
      */
-    public synchronized void setText(String source)
+    public void setText(String source)
     {
     	m_source_ = new StringCharacterIterator(source);
     	updateInternalState();
     }
 
     /**
-     * <p>Set a new string iterator over which to iterate.</p>
-     * <p>Iteration will start from the start of source.</p>
-     * @param source the new source text.
+     * <p>
+     * Sets a new source string iterator for iteration and restart the 
+     * iteration to start from the beginning of the argument source.
+     * </p>
+     * @param source the new source string iterator for iteration.
      * @draft 2.2
      */
-    public synchronized void setText(CharacterIterator source)
+    public void setText(CharacterIterator source)
     {
 		m_source_ = source;    	
-		m_source_.setIndex(0);
+		m_source_.setIndex(m_source_.getBeginIndex());
 		updateInternalState();
     }
     
     // public miscellaneous methods -----------------------------------------
     
-	// protected data members -----------------------------------------------
-	
-	/**
-  	 * true if current codepoint was Hiragana
-  	 */
-  	protected boolean m_isCodePointHiragana_;
-  	/**
-  	 * Position in the original string that starts with a non-FCD sequence
-  	 */
-  	protected int m_FCDStart_;
-  	/** 
-	 * This is the CE from CEs buffer that should be returned. 
-	 * Initial value is 0.
-	 * Forwards iteration will end with m_CEBufferOffset_ == m_CEBufferSize_,
-	 * backwards will end with m_CEBufferOffset_ == 0.
-	 * The next/previous after we reach the end/beginning of the m_CEBuffer_
-	 * will cause this value to be reset to 0.
-	 */
-  	protected int m_CEBufferOffset_;
-  	/** 
-  	 * This is the position to which we have stored processed CEs.
-  	 * Initial value is 0.
-  	 * The next/previous after we reach the end/beginning of the m_CEBuffer_
-	 * will cause this value to be reset to 0.
-  	 */
-  	protected int m_CEBufferSize_; 
-  	
 	// protected constructors -----------------------------------------------
 	
 	/**
@@ -493,29 +548,95 @@ public final class CollationElementIterator
     	updateInternalState();
     }
     
-    // protected methods ----------------------------------------------------
+    // package private data members -----------------------------------------
+	
+	/**
+  	 * true if current codepoint was Hiragana
+  	 */
+  	boolean m_isCodePointHiragana_;
+  	/**
+  	 * Position in the original string that starts with a non-FCD sequence
+  	 */
+  	int m_FCDStart_;
+  	/** 
+	 * This is the CE from CEs buffer that should be returned. 
+	 * Initial value is 0.
+	 * Forwards iteration will end with m_CEBufferOffset_ == m_CEBufferSize_,
+	 * backwards will end with m_CEBufferOffset_ == 0.
+	 * The next/previous after we reach the end/beginning of the m_CEBuffer_
+	 * will cause this value to be reset to 0.
+	 */
+  	int m_CEBufferOffset_;
+  	/** 
+  	 * This is the position to which we have stored processed CEs.
+  	 * Initial value is 0.
+  	 * The next/previous after we reach the end/beginning of the m_CEBuffer_
+	 * will cause this value to be reset to 0.
+  	 */
+  	int m_CEBufferSize_; 
     
-    /**
-     * Checks if iterator is in the buffer zone
-     * @return true if iterator is in buffer zone, false otherwise
-     */
-    protected boolean isInBuffer()
-    {
-    	return m_bufferOffset_ != -1;
-    }
+    // package private methods ----------------------------------------------
     
     /**
      * Sets the collator used.
      * Internal use, all data members will be reset to the default values
      * @param collator to set
      */
-    protected void setCollator(RuleBasedCollator collator) 
+    void setCollator(RuleBasedCollator collator) 
     {
     	m_collator_ = collator;
     	updateInternalState();
     }
     
-    // private data members -------------------------------------------------
+    /**
+     * <p>Sets the iterator to point to the collation element corresponding to
+     * the specified character (the parameter is a CHARACTER offset in the
+     * original string, not an offset into its corresponding sequence of
+     * collation elements). The value returned by the next call to next()
+     * will be the collation element corresponding to the specified position
+     * in the text. Unlike the public method setOffset(int), this method does 
+     * not try to readjust the offset to the start of a contracting sequence.
+     * getOffset() is guaranteed to return the same value as was passed to a 
+     * preceding call to setOffset().</p>
+     * @param offset new character offset into the original text to set. 
+     * @draft 2.2
+     */
+    void setExactOffset(int offset)
+    {  
+	    m_source_.setIndex(offset);
+	    updateInternalState();
+    }
+    
+    /**
+     * Checks if iterator is in the buffer zone
+     * @return true if iterator is in buffer zone, false otherwise
+     */
+    boolean isInBuffer()
+    {
+    	return m_bufferOffset_ != -1;
+    }
+    
+    /**
+ 	 * Determine if a character is a Thai vowel, which sorts after its base 
+ 	 * consonant.
+ 	 * @param ch character to test
+ 	 * @return true if ch is a Thai prevowel, false otherwise
+ 	 */
+	static final boolean isThaiPreVowel(char ch)
+	{ 
+		return (ch >= 0xe40 && ch <= 0xe44) || (ch >= 0xec0 && ch <= 0xec4);
+	}
+
+	/**
+ 	 * Determine if a character is a Thai base consonant, which sorts before 
+ 	 * its prevowel
+ 	 * @param ch character to test
+ 	 * @return true if ch is a Thai base consonant, false otherwise
+ 	 */
+	static final boolean isThaiBaseConsonant(char ch)
+	{
+		return ch >= 0xe01 && ch <= 0xe2e;
+	}
     
     // private inner class --------------------------------------------------
     
@@ -675,8 +796,6 @@ public final class CollationElementIterator
     private static final int CE_LONG_PRIMARY_TAG_ = 12; 
     private static final int CE_CE_TAGS_COUNT = 13;
    	private static final int CE_BYTE_COMMON_ = 0x05;
-   	private static final int CE_PRIMARY_SHIFT_ = 16;
-   	private static final int CE_SECONDARY_SHIFT_ = 8;
    	
 	// end special ce values and tags ---------------------------------------
 	
@@ -773,21 +892,19 @@ public final class CollationElementIterator
 	 * Source offsets points to the current processing character.
 	 * </p>
 	 */
-	private void normalize()
+	private void normalize() 
 	{
-		/* synwee todo normalize to 1 before fcd
-		try {
-			decompose(m_buffer_, m_source_, m_FCDStart_, m_FCDLimit_,
-    	          	  m_collator_.m_decomposition_);
-		} 
-		catch (ArrayOutOfBoundsException e) {
-			// increase the size of the buffer
-			m_buffer_ = new char[m_buffer_.length << 1];
-        	decompose(m_buffer_, m_source_, m_FCDStart_, m_FCDLimit_,
-    	          	  m_collator_.m_decomposition_);
-    	}
-		*/
-    	m_bufferOffset_ = 0;
+		int size = m_FCDLimit_ - m_FCDStart_;
+		m_buffer_.delete(0, m_buffer_.length());
+		m_source_.setIndex(m_FCDStart_);
+		for (int i = 0; i < size; i ++) {
+			m_buffer_.append(m_source_.current());
+			m_source_.next();
+		}
+		String decomp = Normalizer.decompose(m_buffer_.toString(), false);
+		m_buffer_.delete(0, m_buffer_.length());
+		m_buffer_.append(decomp);
+		m_bufferOffset_ = 0;
 	}
 	
 	/** 
@@ -811,24 +928,22 @@ public final class CollationElementIterator
 	{
     	boolean result = true;
 
-    	// srcP = collationSource->pos-1;
-    	
-		// Get the trailing combining class of the current character.  
+    	// Get the trailing combining class of the current character.  
 		// If it's zero, we are OK.
     	m_FCDStart_ = offset;
     	m_source_.setIndex(offset);
     	// trie access
-    	char fcd = 0; // synwee todo: unorm_getFCD16(ch);
+    	char fcd = NormalizerImpl.getFCD16(ch);
     	if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
     		ch = m_source_.next(); // CharacterIterator.DONE has 0 fcd
             if (UTF16.isTrailSurrogate(ch)) {
-               	fcd = 0xFFFF; // unorm_getFCD16FromSurrogatePair(fcd, ch);
+               	fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch);
             } else {
                	fcd = 0;
             }
         }
 
-        byte prevTrailCC = (byte)(fcd & LAST_BYTE_MASK_);
+        int prevTrailCC = fcd & LAST_BYTE_MASK_;
 
         if (prevTrailCC != 0) {
         	// The current char has a non-zero trailing CC. Scan forward until 
@@ -839,16 +954,16 @@ public final class CollationElementIterator
             		break;
             	}
                 // trie access
-                fcd = 0; // unorm_getFCD16(ch);
+                fcd = NormalizerImpl.getFCD16(ch);
                 if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
                 	ch = m_source_.next();
                     if (UTF16.isTrailSurrogate(ch)) {
-                        fcd = 0xFFFF; // unorm_getFCD16FromSurrogatePair(fcd, ch);
+                        fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch);
                     } else {
                         fcd = 0;
                     }
                 }
-                byte leadCC = (byte)(fcd >> SECOND_LAST_BYTE_SHIFT_);
+                int leadCC = fcd >>> SECOND_LAST_BYTE_SHIFT_;
                 if (leadCC == 0) {
                 	// this is a base character, we stop the FCD checks
                     break;
@@ -858,12 +973,12 @@ public final class CollationElementIterator
                     result = false;
                 }
 
-                prevTrailCC = (byte)(fcd & LAST_BYTE_MASK_);
+                prevTrailCC = fcd & LAST_BYTE_MASK_;
             }
         }
+        m_FCDLimit_ = m_source_.getIndex();
         m_source_.setIndex(m_FCDStart_);
         m_source_.next();
-        m_FCDLimit_ = m_source_.getIndex();
     	return result;
 	}
 	
@@ -885,8 +1000,7 @@ public final class CollationElementIterator
 	    }
 		else {
 	        // we are in the buffer, buffer offset will never be 0 here
-	        result = m_buffer_.charAt(m_bufferOffset_ ++);
-	        if (result == 0) {
+	        if (m_bufferOffset_ >= m_buffer_.length()) {
 	            // Null marked end of buffer, revert to the source string and
 	            // loop back to top to try again to get a character.
 	            m_source_.setIndex(m_FCDLimit_);
@@ -894,10 +1008,10 @@ public final class CollationElementIterator
 	            m_buffer_.delete(0, m_buffer_.length());
 	            return nextChar();
 	        }
-	        return result;
+	        return m_buffer_.charAt(m_bufferOffset_ ++);
 		}
 	
-	    if (m_collator_.m_decomposition_ == Collator.NO_DECOMPOSITION 
+	    if (m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION 
 	        || m_bufferOffset_ != -1 || m_FCDLimit_ > startoffset
 	        // skip the fcd checks
 	  		|| result < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_  
@@ -934,20 +1048,10 @@ public final class CollationElementIterator
 	* the buffer.
 	* Source offsets points to the current processing character.</p>
 	*/
-	public void normalizeBackwards()
+	private void normalizeBackwards()
 	{
-	    int start = m_FCDStart_;
-		int size = 0;
-	    /* synwee todo normalize including fcd
-	    try {
-	    	size = decompose(m_buffer_, m_source_, start, m_FCDLimit_);
-		}
-		catch (ArrayOutOfBoundsException .) {
-	    	m_buffer_ = new char[m_buffer_.length << 1];
-	    	size = decompose(m_buffer_, m_source_, start, m_FCDLimit);
-	    }
-	    */
-	    m_bufferOffset_ = size - 1;
+	    normalize();
+	    m_bufferOffset_ = m_buffer_.length();
 	}
 
 	/**
@@ -972,18 +1076,20 @@ public final class CollationElementIterator
 	{
 	    boolean result = true;    
 	    char fcd = 0; 
-	    m_FCDLimit_ = offset;
+	    m_FCDLimit_ = offset + 1;
 	    m_source_.setIndex(offset);
 	    if (!UTF16.isSurrogate(ch)) {
-	        fcd = 0; // synwee todo unorm_getFCD16(fcdTrieIndex, c);
+	        fcd = NormalizerImpl.getFCD16(ch);
 	    } 
 	    else if (UTF16.isTrailSurrogate(ch) && m_FCDLimit_ > 0) { 
 	    	// note trail surrogate characters gets 0 fcd
+	    	char trailch = ch;
 	    	ch = m_source_.previous();  
 	       	if (UTF16.isLeadSurrogate(ch)) {
-	        	fcd = 0; // unorm_getFCD16(fcdTrieIndex, c2);
+	        	fcd = NormalizerImpl.getFCD16(ch);
 	        	if (fcd != 0) {
-	            	fcd = 0; // unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c);
+	            	fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, 
+	            													trailch);
 	        	}
 	    	} 
 	    	else {
@@ -991,44 +1097,47 @@ public final class CollationElementIterator
 	    	}
 	    }
 	
-	    byte leadCC = (byte)(fcd >> SECOND_LAST_BYTE_SHIFT_);
-	    if (leadCC != 0) {
-	        // The current char has a non-zero leading combining class.
-	        // Scan backward until we find a char with a trailing cc of zero.
-	        while (true) {
-	            if (m_source_.getIndex() == 0) {
-	                break;
-	            }
+	    int leadCC = fcd >>> SECOND_LAST_BYTE_SHIFT_;
+	    // The current char has a non-zero leading combining class.
+	    // Scan backward until we find a char with a trailing cc of zero.
+        
+	    while (leadCC != 0) {
+            offset = m_source_.getIndex();
+            if (offset == 0) {
+	            break;
+	        }
+	        ch = m_source_.previous();
+	        if (!UTF16.isSurrogate(ch)) {
+	            fcd = NormalizerImpl.getFCD16(ch);
+	        } 
+	        else if (UTF16.isTrailSurrogate(ch) && m_source_.getIndex() > 0) {
+	            char trail = ch;
 	            ch = m_source_.previous();
-	            if (!UTF16.isSurrogate(ch)) {
-	                fcd = 0; //unorm_getFCD16(fcdTrieIndex, c);
-	            } 
-	            else {
-	            	if (UTF16.isTrailSurrogate(ch) && m_source_.getIndex() > 0) 
-	            	{
-	            		ch = m_source_.previous();
-	            	    if (UTF16.isLeadSurrogate(ch)) {
-	                		fcd = 0; // unorm_getFCD16(fcdTrieIndex, c2);
-	            	    }
-	            		if (fcd != 0) {
-	                   		fcd = 0; // unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c);
-	                	}
-	            	} else {
-	                	fcd = 0; // unpaired surrogate
-	            	}
-	            	byte prevTrailCC = (byte)(fcd & LAST_BYTE_MASK_);
-	            	if (prevTrailCC == 0) {
-	                	break;
-	            	}
-	
-	            	if (leadCC < prevTrailCC) {
-	                	result = false;
-	            	}
-	            	leadCC = (byte)(fcd >> SECOND_LAST_BYTE_SHIFT_);
-	        	}
-	    	}
+	            if (UTF16.isLeadSurrogate(ch)) {
+	                fcd = NormalizerImpl.getFCD16(ch);
+	            }
+	            if (fcd != 0) {
+	                fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, trail);
+	            }
+            }
+            else {
+	            fcd = 0; // unpaired surrogate
+	        }
+	        int prevTrailCC = fcd & LAST_BYTE_MASK_;
+	        if (leadCC < prevTrailCC) {
+	            result = false;
+	        }
+	        leadCC = fcd >>> SECOND_LAST_BYTE_SHIFT_;
 	    }
-	    m_FCDStart_ = m_source_.getIndex(); // character with 0 lead/trail fcd
+	
+        // storing character with 0 lead fcd or the 1st accent with a base 
+        // character before it   
+        if (fcd == 0) {
+            m_FCDStart_ = offset;
+        }
+        else {
+            m_FCDStart_ = m_source_.getIndex();
+        } 
 	    m_source_.setIndex(m_FCDLimit_);
 	    return result;
 	}
@@ -1062,7 +1171,7 @@ public final class CollationElementIterator
 		char result = m_source_.previous();
 		int startoffset = m_source_.getIndex();
 	    if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ 
-	        || m_collator_.m_decomposition_ == Collator.NO_DECOMPOSITION 
+	        || m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION 
 	        || m_FCDStart_ <= startoffset || m_source_.getIndex() == 0) {
 	        return result;
 	    }
@@ -1073,7 +1182,7 @@ public final class CollationElementIterator
 	        return result;
 	    }
 	    // Need a more complete FCD check and possible normalization.
-	    if (!FCDCheckBackwards(ch, startoffset)) {
+	    if (!FCDCheckBackwards(result, startoffset)) {
 	        normalizeBackwards();
 	        m_bufferOffset_ --;
 	        result = m_buffer_.charAt(m_bufferOffset_);
@@ -1085,52 +1194,17 @@ public final class CollationElementIterator
 	 * Determines if it is at the start of source iteration
 	 * @return true if iterator at the start, false otherwise
 	 */
-	private boolean isBackwardsStart() 
+	private final boolean isBackwardsStart() 
 	{
     	return (m_bufferOffset_ < 0 && m_source_.getIndex() == 0)
     	        || (m_bufferOffset_ == 0 && m_FCDStart_ <= 0);
 	}
 	
-	/**
- 	 * Determine if a character is a Thai vowel, which sorts after its base 
- 	 * consonant.
- 	 * @param ch character to test
- 	 * @return true if ch is a Thai prevowel, false otherwise
- 	 */
-	private boolean isThaiPreVowel(char ch)
-	{ 
-		return (ch >= 0xe40 && ch <= 0xe44) || (ch >= 0xec0 && ch <= 0xec4);
-	}
-
-	/**
- 	 * Determine if a character is a Thai base consonant, which sorts before 
- 	 * its prevowel
- 	 * @param ch character to test
- 	 * @return true if ch is a Thai base consonant, false otherwise
- 	 */
-	private boolean isThaiBaseConsonant(char ch)
-	{
-		return ch >= 0xe01 && ch <= 0xe2e;
-	}
-	
-	
-	/**
- 	 * Determine if a character is a Jamo
- 	 * @param ch character to test
- 	 * @return true if ch is a Jamo, false otherwise
- 	 */
-	private boolean isJamo(char ch)
-	{ 
-		return (ch - 0x1100 <= 0x1112 - 0x1100) 
-		       || (ch - 0x1161 <= 0x1175 - 0x1161) 
-		       || (ch - 0x11A8 <= 0x11C2 - 0x11A8);
-	}
-	
 	/**
 	 * Checks if iterator is at the end of its source string.
 	 * @return true if it is at the end, false otherwise
 	 */
-	private boolean isEnd() 
+	private final boolean isEnd() 
 	{
     	if (m_bufferOffset_ >= 0) {
     		if (m_bufferOffset_ != m_buffer_.length()) {
@@ -1155,7 +1229,8 @@ public final class CollationElementIterator
 	 * @param trail character
 	 * @return next CE for the surrogate characters
 	 */
-	private int nextSurrogate(RuleBasedCollator collator, int ce, char trail)
+	private final int nextSurrogate(RuleBasedCollator collator, int ce, 
+	                                char trail)
 	{
 		if (!UTF16.isTrailSurrogate(trail)) {
 	        updateInternalState(m_backup_);
@@ -1188,7 +1263,7 @@ public final class CollationElementIterator
 	 * @param ch current character
 	 * @return next CE for Thai characters
 	 */
-	private int nextThai(RuleBasedCollator collator, int ce, char ch) 
+	private int nextThai(RuleBasedCollator collator, int ce, char ch)
 	{
 		if (m_bufferOffset_ != -1 // already swapped
 		    || isEnd() || !isThaiBaseConsonant(m_source_.current())) {
@@ -1430,6 +1505,7 @@ public final class CollationElementIterator
 	 * @param collator collator to use
 	 * @param ce current ce
 	 * @param entrybackup entry backup iterator status
+	 * @return ce of the next contraction
 	 */
 	private int nextContraction(RuleBasedCollator collator, int ce)
 	{
@@ -1895,7 +1971,7 @@ public final class CollationElementIterator
 	        return collator.m_contractionCE_[entryoffset];
 	    }
 	    StringBuffer buffer = new StringBuffer();
-	    while (collator.isUnsafe(ch)) {
+	    while (collator.isUnsafe(ch) || isThaiBaseConsonant(ch)) {
 	        buffer.insert(0, ch);
 	        ch = previousChar();
 	        if (isBackwardsStart()) {
diff --git a/icu4j/src/com/ibm/icu/text/CollationKey.java b/icu4j/src/com/ibm/icu/text/CollationKey.java
index f3b2480b58d..48e74f46b07 100755
--- a/icu4j/src/com/ibm/icu/text/CollationKey.java
+++ b/icu4j/src/com/ibm/icu/text/CollationKey.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollationKey.java,v $ 
-* $Date: 2002/05/16 20:04:49 $ 
-* $Revision: 1.5 $
+* $Date: 2002/06/21 23:56:44 $ 
+* $Revision: 1.6 $
 *
 *******************************************************************************
 */
@@ -15,28 +15,42 @@ package com.ibm.icu.text;
 import java.util.Arrays;
 
 /**
- * <p>A <code>CollationKey</code> represents a <code>String</code> under the
+ * <p>
+ * A <code>CollationKey</code> represents a <code>String</code> under the
  * rules of a specific <code>Collator</code> object. Comparing two
  * <code>CollationKey</code>s returns the relative order of the
- * <code>String</code>s they represent. Using <code>CollationKey</code>s to 
- * compare <code>String</code>s is generally faster than using 
- * <code>Collator.compare</code>. Thus, when the <code>String</code>s must be 
- * compared multiple times, for example when sorting a list of 
- * <code>String</code>s. It's more efficient to use <code>CollationKey</code>s.
+ * <code>String</code>s they represent.
+ * </p>
+ * <p>
+ * <code>CollationKey</code> instances can not be create directly. Rather, 
+ * they are generated by calling <code>Collator.getCollationKey(String)</code>. 
+ * Since the rule set of each <code>Collator differs</code>, the sort orders of 
+ * the same string under two unique <code>Collator</code> may not be the same. 
+ * Hence comparing <code>CollationKey</code>s generated from different 
+ * <code>Collator</code> objects may not give the right results.
+ * </p>
+ * <p>
+ * Similar to <code>CollationKey.compareTo(CollationKey)</code>, 
+ * the method <code>RuleBasedCollator.compare(String, String)</code> compares
+ * two strings and returns the relative order. During the construction
+ * of a <code>CollationKey</code> object, the entire source string is examined
+ * and processed into a series of bits that are stored in the 
+ * <code>CollationKey</code> object. Bitwise comparison on the bit sequences 
+ * are then performed during <code>CollationKey.compareTo(CollationKey)</code>. 
+ * This comparison could incurr expensive startup costs while creating 
+ * the <code>CollationKey</code> object, but once the objects are created, 
+ * binary comparisons are fast, and is recommended when the same strings are
+ * to be compared over and over again. 
+ * On the other hand <code>Collator.compare(String, String)</code> examines 
+ * and processes the string only until the first characters differing in order,
+ * and is recommend for use if the <code>String</code>s are to be compared only
+ * once.
+ * </p>
+ * <p>
+ * Details of the composition of the bit sequence is located at
+ * <a href=http://oss.software.ibm.com/icu/userguide/Collate_ServiceArchitecture.html>
+ * user guide</a>.
  * </p>
- * <p>You can not create <code>CollationKey</code>s directly. Rather, generate 
- * them by calling <code>Collator.getCollationKey(String)</code>. You can only 
- * compare <code>CollationKey</code>s generated from the same 
- * <code>Collator</code> object.</p>
- * <p>Generating a <code>CollationKey</code> for a <code>String</code>
- * involves examining the entire <code>String</code> and converting it to 
- * series of bits that can be compared bitwise. This allows fast comparisons 
- * once the keys are generated. The cost of generating keys is recouped in 
- * faster comparisons when <code>String</code>s need to be compared many 
- * times. On the other hand, the result of a comparison is often determined by 
- * the first couple of characters of each <code>String</code>.
- * <code>Collator.compare(String, String)</code> examines only as many characters as it needs 
- * which allows it to be faster when doing single comparisons.</p>
  * <p>The following example shows how <code>CollationKey</code>s might be used
  * to sort a list of <code>String</code>s.</p>
  * <blockquote>
@@ -63,7 +77,7 @@ import java.util.Arrays;
  * System.out.println( keys[2].getSourceString() );
  * </pre>
  * </blockquote>
- *
+ * </p>
  * @see Collator
  * @see RuleBasedCollator
  * @author Syn Wee Quek
@@ -77,7 +91,7 @@ public final class CollationKey implements Comparable
 	// public getters -------------------------------------------------------
 	
     /**
-     * Returns the String that this CollationKey represents.
+     * Returns the source string that this CollationKey represents.
      * @return source string that this CollationKey represents
      * @draft 2.2
      */
@@ -87,11 +101,44 @@ public final class CollationKey implements Comparable
     }
 
     /**
-     * <p>Duplicates and returns the value of this CollationKey as a sequence 
-     * of big-endian bytes.</p> 
-     * <p>If two CollationKeys could be legitimately compared, then one could 
-     * compare the byte arrays of each to obtain the same result.</p>  
-     * @return CollationKey value in a sequence of big-endian byte bytes.
+     * <p>
+     * Duplicates and returns the value of this CollationKey as a sequence 
+     * of big-endian bytes terminated by a null.
+     * </p> 
+     * <p>
+     * If two CollationKeys could be legitimately compared, then one could 
+     * compare the byte arrays of each to obtain the same result.
+     * <pre>
+     * byte key1[] = collationkey1.toByteArray();
+     * byte key2[] = collationkey2.toByteArray();
+     * int i = 0;
+     * while (key1[i] != 0 && key2[i] != 0) {
+     *	   int key = key1[i] & 0xFF;
+     *     int targetkey = key2[i] & 0xFF;
+     *     if (key &lt; targetkey) {
+     *         System.out.println("String 1 is less than string 2");
+     *         return;
+     *     }
+     *     if (targetkey &lt; key) {
+     *         System.out.println("String 1 is more than string 2");
+     *     }
+     *     i ++;
+     * }
+     * int key = key1[i] & 0xFF;
+     * int targetkey = key2[i] & 0xFF;
+     * if (key &lt; targetkey) {
+     *     System.out.println("String 1 is less than string 2");
+     *     return;
+     * }
+     * if (targetkey &lt; key) {
+     *     System.out.println("String 1 is more than string 2");
+     *     return;
+     * }
+     * System.out.println("String 1 is equals to string 2");;
+     * </pre>
+     * </p>  
+     * @return CollationKey value in a sequence of big-endian byte bytes 
+     *         terminated by a null.
      * @draft 2.2
      */
     public byte[] toByteArray() 
@@ -112,15 +159,22 @@ public final class CollationKey implements Comparable
  	// public other methods -------------------------------------------------	
  	
     /**
-     * <p>Compare this CollationKey to the target CollationKey. The collation 
-     * rules of the Collator object which created these keys are applied.</p>
-     * <p><strong>Note:</strong> CollationKeys created by different Collators 
-     * can not be compared.</p>
+     * <p>
+     * Compare this CollationKey to the argument target CollationKey. 
+     * The collation 
+     * rules of the Collator object which created these keys are applied.
+     * </p>
+     * <p>
+     * <strong>Note:</strong> Comparison between CollationKeys created by 
+     * different Collators may not return the correct result. See class 
+     * documentation.
+     * </p>
      * @param target target CollationKey
      * @return an integer value, if value is less than zero this CollationKey
      *         is less than than target, if value is zero if they are equal 
      *         and value is greater than zero if this CollationKey is greater 
      *         than target.
+     * @exception NullPointerException thrown when argument is null.
      * @see Collator#compare(String, String)
      * @draft 2.2
      */
@@ -151,13 +205,21 @@ public final class CollationKey implements Comparable
     }
 
     /**
-     * <p>Compares this CollationKey with the specified Object.</p>
+     * <p>
+     * Compares this CollationKey with the specified Object.
+     * The collation 
+     * rules of the Collator object which created these objects are applied.
+     * </p>
+     * <p>
+     * See note in compareTo(CollationKey) for warnings of incorrect results
+     * </p>
      * @param obj the Object to be compared.
      * @return Returns a negative integer, zero, or a positive integer 
      *         respectively if this CollationKey is less than, equal to, or 
      *         greater than the given Object.
-     * @exception ClassCastException thrown when the specified Object is not a
-     *		      CollationKey.
+     * @exception ClassCastException thrown when the specified argument is not 
+     *            a CollationKey. NullPointerException thrown when argument 
+     *            is null.
      * @see #compareTo(CollationKey)
      * @draft 2.2
      */
@@ -167,22 +229,52 @@ public final class CollationKey implements Comparable
     }
 
     /**
-     * <p>Compare this CollationKey and the target CollationKey for equality.
+     * <p>
+     * Compare this CollationKey and the argument target object for equality.
+     * The collation 
+     * rules of the Collator object which created these objects are applied.
      * </p>
-     * <p>The collation rules of the Collator object which created these keys 
-     * are applied.</p>
-     * <p><strong>Note:</strong> CollationKeys created by different Collators 
-     * can not be compared.</p>
-     * @param target the CollationKey to compare to.
+     * <p>
+     * See note in compareTo(CollationKey) for warnings of incorrect results
+     * </p>
+     * @param target the object to compare to.
      * @return true if two objects are equal, false otherwise.
+     * @see #compareTo(CollationKey)
+     * @exception ClassCastException thrown when the specified argument is not 
+     *            a CollationKey. NullPointerException thrown when argument 
+     *            is null.
      * @draft 2.2
      */
     public boolean equals(Object target) 
+    {
+        if (!(target instanceof CollationKey)) {
+            return false;
+        }
+        
+        return equals((CollationKey)target);
+    }
+    
+    /**
+     * <p>
+     * Compare this CollationKey and the argument target CollationKey for 
+     * equality.
+     * The collation 
+     * rules of the Collator object which created these objects are applied.
+     * </p>
+     * <p>
+     * See note in compareTo(CollationKey) for warnings of incorrect results
+     * </p>
+     * @param target the CollationKey to compare to.
+     * @return true if two objects are equal, false otherwise.
+     * @exception NullPointerException thrown when argument is null.
+     * @draft 2.2
+     */
+    public boolean equals(CollationKey target) 
     {
         if (this == target) {
         	return true;
         }
-        if (target == null || !(target instanceof CollationKey)) {
+        if (target == null) {
             return false;
         }
         CollationKey other = (CollationKey)target;
@@ -200,12 +292,13 @@ public final class CollationKey implements Comparable
     }
 
     /**
-     * <p>Creates a hash code for this CollationKey. The hash value is 
-     * calculated on the key itself, not the String from which the key was 
-     * created. Thus if x and y are CollationKeys, then 
-     * x.hashCode(x) == y.hashCode() if x.equals(y) is true. This allows 
-     * language-sensitive comparison in a hash table.</p>
-     * <p>See the CollatinKey class description for an example.</p>
+     * <p>
+     * Creates a hash code for this CollationKey. The hash value is calculated 
+     * on the key itself, not the String from which the key was created. Thus 
+     * if x and y are CollationKeys, then x.hashCode(x) == y.hashCode() 
+     * if x.equals(y) is true. This allows language-sensitive comparison in a 
+     * hash table.
+     * </p>
      * @return the hash value.
      * @draft 2.2
      */
diff --git a/icu4j/src/com/ibm/icu/text/CollationParsedRuleBuilder.java b/icu4j/src/com/ibm/icu/text/CollationParsedRuleBuilder.java
new file mode 100644
index 00000000000..8276a24f357
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/CollationParsedRuleBuilder.java
@@ -0,0 +1,3487 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2002, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollationParsedRuleBuilder.java,v $ 
+* $Date: 2002/06/21 23:57:55 $ 
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+ 
+import java.io.InputStream;
+import java.io.BufferedInputStream;
+import java.text.ParseException;
+import java.util.Hashtable;
+import java.util.Vector;
+import java.util.Arrays;
+import java.util.Enumeration;
+
+import com.ibm.icu.dev.test.lang.UCharacterCaseTest;
+import com.ibm.icu.impl.TrieBuilder;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.impl.NormalizerImpl;
+import com.ibm.icu.util.RangeValueIterator;
+
+/**
+* Class for building a collator from a list of collation rules.
+* This class is uses CollationRuleParser
+* @author Syn Wee Quek
+* @since release 2.2, June 11 2002
+* @draft 2.2
+*/
+class CollationParsedRuleBuilder
+{     
+	// package private constructors ------------------------------------------
+
+    /**
+     * Constructor
+     * @param rules collation rules
+     * @exception ParseException thrown when argument rules have an invalid 
+     *            syntax 
+     */
+    CollationParsedRuleBuilder(String rules) throws ParseException
+    {
+        m_parser_ = new CollationRuleParser(rules);
+        m_utilColEIter_ = RuleBasedCollator.UCA_.getCollationElementIterator(
+                                                                           "");
+    }
+    
+    // package private inner classes -----------------------------------------
+    
+    /** 
+     * Inverse UCA wrapper
+     */
+    static class InverseUCA 
+    {
+        // package private constructor ---------------------------------------
+        
+        InverseUCA() 
+        {
+        }
+        
+        // package private data member ---------------------------------------
+        
+        /**
+         * Array list of characters
+         */
+        int m_table_[];
+        /**
+         * Array list of continuation characters
+         */
+        char m_continuations_[];
+        
+        // package private method --------------------------------------------
+        
+        /**
+	     * Returns the previous inverse ces of the argument ces
+	     * @param ce ce to test
+	     * @param contce continuation ce to test
+	     * @param strength collation strength
+	     * @param result an array to store the return results of inverse ce, 
+	     *        previous inverse ce and previous inverse continuation ce
+	     */
+	    final void getInversePrevCE(int ce, int contce, int strength, 
+                                    int result[]) 
+	    {
+		    int ice = findInverseCE(ce, contce);
+		
+		    if (ice < 0) {
+		        result[0] = -1;
+		                result[1] = CollationElementIterator.NULLORDER;
+		        return;
+		    }
+		
+		    ce &= STRENGTH_MASK_[strength];
+		    contce &= STRENGTH_MASK_[strength];
+		
+		    result[1] = ce;
+		    result[2] = contce;
+		
+		    while ((result[1]  & STRENGTH_MASK_[strength]) == ce 
+		           && (result[2]  & STRENGTH_MASK_[strength])== contce
+		           && ice > 0) { 
+		                // this condition should prevent falling off the edge of the 
+		                // world 
+		        // here, we end up in a singularity - zero
+		        result[1] = m_table_[3 * (-- ice)];
+		        result[2] = m_table_[3 * ice + 1];
+		   }
+		}
+        
+        /**
+         * Finding the inverse CE of the argument CEs
+         * @param ce CE to be tested
+         * @param contce continuation CE
+         * @return inverse CE
+         */
+        int findInverseCE(int ce, int contce) 
+        {
+            int bottom = 0;
+            int top = m_table_.length / 3;
+            int result = 0;
+    
+            while (bottom < top - 1) {
+		        result = (top + bottom) >> 1;
+		        int first = m_table_[3 * result];
+		        int second = m_table_[3 * result + 1];
+			    if (first > ce) {
+			        top = result;
+			    } 
+                else if (first < ce) {
+			        bottom = result;
+			    } 
+                else {
+			        if (second > contce) {
+			            top = result;
+			        } 
+                    else if (second < contce) {
+			            bottom = result;
+			        } 
+                    else {
+			            break;
+			        }
+			    }
+		    }
+		
+		    return result;
+		}
+    
+	    /**
+	     * Getting gap offsets in the inverse UCA
+	     * @param listheader parsed token lists
+	     * @exception Exception thrown when error occurs while finding the 
+	     *            collation gaps
+	     */
+	    void getInverseGapPositions(CollationRuleParser.TokenListHeader 
+                                                                    listheader)
+	                                                           throws Exception 
+	    {
+	        // reset all the gaps
+		    CollationRuleParser.Token token = listheader.m_first_;
+	        int tokenstrength = token.m_strength_;
+	
+			for (int i = 0; i < 3; i ++) {
+			    listheader.m_gapsHi_[3 * i] = 0;
+			    listheader.m_gapsHi_[3 * i + 1] = 0;
+			    listheader.m_gapsHi_[3 * i + 2] = 0;
+			    listheader.m_gapsLo_[3 * i] = 0;
+			    listheader.m_gapsLo_[3 * i + 1] = 0;
+			    listheader.m_gapsLo_[3 * i + 2] = 0;
+			    listheader.m_numStr_[i] = 0;
+			    listheader.m_fStrToken_[i] = null;
+			    listheader.m_lStrToken_[i] = null;
+			    listheader.m_pos_[i] = -1;
+		    }
+	
+	        if (listheader.m_baseCE_ >= CE_PRIMARY_IMPLICIT_MIN_ 
+	            && listheader.m_baseCE_ < CE_PRIMARY_IMPLICIT_MAX_) { 
+	            // implicits -
+			    listheader.m_pos_[0] = 0;
+			    int t1 = listheader.m_baseCE_;
+			    int t2 = listheader.m_baseContCE_;
+			    listheader.m_gapsLo_[0] = mergeCE(t1, t2, 
+	                                              Collator.PRIMARY);
+			    listheader.m_gapsLo_[1] = mergeCE(t1, t2, 
+	                                              Collator.SECONDARY);
+			    listheader.m_gapsLo_[2] = mergeCE(t1, t2, 
+	                                              Collator.TERTIARY);
+			    if (listheader.m_baseCE_ < 0xEF000000) {
+			        // first implicits have three byte primaries, with a gap of
+                    // one so we esentially need to add 2 to the top byte in 
+	                // listheader.m_baseContCE_
+			        t2 += 0x02000000;
+			    } 
+	            else {
+			        // second implicits have four byte primaries, with a gap of
+                    // IMPLICIT_LAST2_MULTIPLIER_
+			        // Now, this guy is not really accessible here, so until we 
+	                // find a better way to pass it around, assume that the gap is 1
+			        t2 += 0x00020000;
+			    }
+		        listheader.m_gapsHi_[0] = mergeCE(t1, t2, 
+	                                              Collator.PRIMARY);
+		        listheader.m_gapsHi_[1] = mergeCE(t1, t2, 
+	                                              Collator.SECONDARY);
+		        listheader.m_gapsHi_[2] = mergeCE(t1, t2, 
+	                                              Collator.TERTIARY);
+		    } 
+	        else if (listheader.m_indirect_ == true 
+                     && listheader.m_nextCE_ != 0) {
+		        listheader.m_pos_[0] = 0;
+			    int t1 = listheader.m_baseCE_;
+			    int t2 = listheader.m_baseContCE_;
+			    listheader.m_gapsLo_[0] = mergeCE(t1, t2, 
+	                                              Collator.PRIMARY);
+			    listheader.m_gapsLo_[1] = mergeCE(t1, t2, 
+	                                              Collator.SECONDARY);
+			    listheader.m_gapsLo_[2] = mergeCE(t1, t2, 
+	                                              Collator.TERTIARY);
+			    t1 = listheader.m_nextCE_;
+			    t2 = listheader.m_nextContCE_;
+			    listheader.m_gapsHi_[0] = mergeCE(t1, t2, 
+	                                              Collator.PRIMARY);
+			    listheader.m_gapsHi_[1] = mergeCE(t1, t2, 
+	                                              Collator.SECONDARY);
+			    listheader.m_gapsHi_[2] = mergeCE(t1, t2, 
+	                                              Collator.TERTIARY);
+			} 
+	        else {
+			    while (true) {
+			        if (tokenstrength < CE_STRENGTH_LIMIT_) {
+	                    listheader.m_pos_[tokenstrength] 
+	                                   = getInverseNext(listheader, 
+                                                        tokenstrength);
+			            if (listheader.m_pos_[tokenstrength] >= 0) {
+			                listheader.m_fStrToken_[tokenstrength] = token;
+			            } 
+	                    else { 
+	                        // The CE must be implicit, since it's not in the 
+                            // table 
+				            // Error
+				            throw new Exception("Internal program error");
+			            }
+			        }
+			
+			        while (token != null && token.m_strength_ >= tokenstrength) 
+                    {
+				        if (tokenstrength < CE_STRENGTH_LIMIT_) {
+				            listheader.m_lStrToken_[tokenstrength] = token;
+				        }
+				        token = token.m_next_;
+				    }
+			        if (tokenstrength < CE_STRENGTH_LIMIT_ - 1) {
+			            // check if previous interval is the same and merge the 
+	                    // intervals if it is so
+			            if (listheader.m_pos_[tokenstrength] 
+	                                == listheader.m_pos_[tokenstrength + 1]) {
+			                listheader.m_fStrToken_[tokenstrength] 
+	                                  = listheader.m_fStrToken_[tokenstrength 
+                                                                + 1];
+			                listheader.m_fStrToken_[tokenstrength + 1] = null;
+			                listheader.m_lStrToken_[tokenstrength + 1] = null;
+			                listheader.m_pos_[tokenstrength + 1] = -1;
+			            }
+			        }
+			        if (token != null) {
+			            tokenstrength = token.m_strength_;
+			        } 
+	                else {
+			            break;
+			        }
+			    }
+		        for (int st = 0; st < 3; st ++) {
+	                int pos = listheader.m_pos_[st];
+		            if (pos >= 0) {
+		                int t1 = m_table_[3 * pos];
+		                int t2 = m_table_[3 * pos + 1];
+		                listheader.m_gapsHi_[3 * st] = mergeCE(t1, t2, 
+	                                                Collator.PRIMARY);
+		                listheader.m_gapsHi_[3 * st + 1] = mergeCE(t1, t2, 
+	                                              Collator.SECONDARY);
+		                listheader.m_gapsHi_[3 * st + 2] = (t1 & 0x3f) << 24 
+	                                                       | (t2 & 0x3f) << 16;
+		                pos --;
+		                t1 = m_table_[3 * pos];
+		                t2 = m_table_[3 * pos + 1];
+		                listheader.m_gapsLo_[3 * st] = mergeCE(t1, t2, 
+	                                                Collator.PRIMARY);
+		                listheader.m_gapsLo_[3 * st + 1] = mergeCE(t1, t2, 
+	                                              Collator.SECONDARY);
+		                listheader.m_gapsLo_[3 * st + 2] = (t1 & 0x3f) << 24 
+	                                                       | (t2 & 0x3f) << 16;
+		            }
+		        }
+		    }
+        }
+	    
+	    /**
+	     * Gets the next CE in the inverse table
+	     * @param listheader token list header
+	     * @param strength collation strength
+	     * @return next ce
+	     */
+	    private final int getInverseNext(CollationRuleParser.TokenListHeader 
+                                                                    listheader, 
+	                                     int strength) 
+	    {
+		    int ce = listheader.m_baseCE_;
+		    int secondce = listheader.m_baseContCE_; 
+		    int result = findInverseCE(ce, secondce);
+			
+		    if (result < 0) {
+		        return -1;
+		    }
+			
+		    ce &= STRENGTH_MASK_[strength];
+		    secondce &= STRENGTH_MASK_[strength];
+		
+		    int nextce = ce;
+		    int nextcontce = secondce;
+		
+		    while((nextce & STRENGTH_MASK_[strength]) == ce 
+		          && (nextcontce  & STRENGTH_MASK_[strength]) == secondce) {
+		        nextce = m_table_[3 * (++ result)];
+		        nextcontce = m_table_[3 * result + 1];
+		    }
+			
+		    listheader.m_nextCE_ = nextce;
+		    listheader.m_nextContCE_ = nextcontce;
+		
+		    return result;
+		}
+	}
+
+    // package private data members ------------------------------------------
+    
+    /**
+     * Inverse UCA, instantiate only when required
+     */
+    static final InverseUCA INVERSE_UCA_; 
+    
+    /**
+     * Initializing the inverse UCA
+     */
+    static {
+        try
+        {
+            String invdat = "/com/ibm/icu/impl/data/invuca.dat";
+            InputStream i = invdat.getClass().getResourceAsStream(invdat);
+            BufferedInputStream b = new BufferedInputStream(i, 110000);
+            INVERSE_UCA_ = CollatorReader.readInverseUCA(b);
+            b.close();
+            i.close();
+        }
+        catch (Exception e)
+        {
+            e.printStackTrace();
+            throw new RuntimeException(e.getMessage());
+        }
+    }
+    
+    // package private methods -----------------------------------------------
+    
+    /**
+     * Parse and sets the collation rules in the argument collator
+     * @param collator to set
+     * @exception Exception thrown when internal program error occurs
+     */
+    void setRules(RuleBasedCollator collator) throws Exception
+    {
+        if (m_parser_.m_resultLength_ > 0) { 
+		    // we have a set of rules, let's make something of it 
+		    assembleTailoringTable(collator);
+		} 
+		else { // no rules, but no error either must be only options
+		       // We will init the collator from UCA   
+		    collator.setWithUCATables();
+		    // And set only the options
+		    m_parser_.setDefaultOptionsInCollator(collator);
+		}
+    }
+    
+    /**
+     * 2.  Eliminate the negative lists by doing the following for each 
+     * non-null negative list: 
+     * o   if previousCE(baseCE, strongestN) != some ListHeader X's baseCE, 
+     * create new ListHeader X 
+     * o   reverse the list, add to the end of X's positive list. Reset the 
+     * strength of the first item you add, based on the stronger strength 
+     * levels of the two lists. 
+     * 
+     * 3.  For each ListHeader with a non-null positive list: 
+     * o   Find all character strings with CEs between the baseCE and the 
+     * next/previous CE, at the strength of the first token. Add these to the 
+     * tailoring. 
+     *     ? That is, if UCA has ...  x <<< X << x' <<< X' < y ..., and the 
+     *       tailoring has & x < z... 
+     *     ? Then we change the tailoring to & x  <<< X << x' <<< X' < z ... 
+     * 
+     * It is possible that this part should be done even while constructing list
+     * The problem is that it is unknown what is going to be the strongest 
+     * weight.
+     * So we might as well do it here
+     * o   Allocate CEs for each token in the list, based on the total number N 
+     * of the largest level difference, and the gap G between baseCE and nextCE 
+     * at that level. The relation * between the last item and nextCE is the 
+     * same as the strongest strength. 
+     * o   Example: baseCE < a << b <<< q << c < d < e * nextCE(X,1) 
+     *     ? There are 3 primary items: a, d, e. Fit them into the primary gap. 
+     *     Then fit b and c into the secondary gap between a and d, then fit q 
+     *     into the tertiary gap between b and c. 
+     * o   Example: baseCE << b <<< q << c * nextCE(X,2) 
+     *     ? There are 2 secondary items: b, c. Fit them into the secondary gap. 
+     *       Then fit q into the tertiary gap between b and c. 
+     * o   When incrementing primary values, we will not cross high byte 
+     *     boundaries except where there is only a single-byte primary. That is 
+     *     to ensure that the script reordering will continue to work. 
+     * @param collator the rule based collator to update
+     * @exception Exception thrown when internal program error occurs
+     */
+    void assembleTailoringTable(RuleBasedCollator collator) throws Exception
+    {
+        
+	    for (int i = 0; i < m_parser_.m_resultLength_; i ++) {
+		    // now we need to generate the CEs  
+		    // We stuff the initial value in the buffers, and increase the 
+            // appropriate buffer according to strength                                                          */
+		    initBuffers(m_parser_.m_listHeader_[i]);
+	    }
+		
+        if (m_parser_.m_variableTop_ != null) { 
+            // stuff the variable top value
+		    m_parser_.m_options_.m_variableTopValue_ 
+                                    = m_parser_.m_variableTop_.m_CE_[0] >>> 16;
+		    // remove it from the list
+		    if (m_parser_.m_variableTop_.m_listHeader_.m_first_ 
+                == m_parser_.m_variableTop_) { // first in list
+		        m_parser_.m_variableTop_.m_listHeader_.m_first_ 
+                                            = m_parser_.m_variableTop_.m_next_;
+		    }
+		    if (m_parser_.m_variableTop_.m_listHeader_.m_last_ 
+                                                == m_parser_.m_variableTop_) { 
+                // first in list
+		        m_parser_.m_variableTop_.m_listHeader_.m_last_ 
+                                        = m_parser_.m_variableTop_.m_previous_;    
+		    }
+		    if (m_parser_.m_variableTop_.m_next_ != null) {
+		        m_parser_.m_variableTop_.m_next_.m_previous_ 
+                                        = m_parser_.m_variableTop_.m_previous_;
+		    }
+		    if (m_parser_.m_variableTop_.m_previous_ != null) {
+		        m_parser_.m_variableTop_.m_previous_.m_next_ 
+                                           = m_parser_.m_variableTop_.m_next_;
+		    }
+		}
+		
+		
+		BuildTable t = new BuildTable(m_parser_);
+		
+		// After this, we have assigned CE values to all regular CEs now we 
+		// will go through list once more and resolve expansions, make 
+		// UCAElements structs and add them to table               
+		for (int i = 0; i < m_parser_.m_resultLength_; i ++) {
+		    // now we need to generate the CEs 
+		    // We stuff the initial value in the buffers, and increase the 
+		    // appropriate buffer according to strength                                                          */
+		    createElements(t, m_parser_.m_listHeader_[i]);
+		}
+		
+		Elements el = new Elements();
+        el.m_isThai_ = false;
+		el.m_prefixChars_ = null;
+		int ce[] = new int[256];
+        StringBuffer str = new StringBuffer();
+        
+		// add latin-1 stuff
+		for (char u = 0; u < 0x100; u ++) {
+            // if ((CE = ucmpe32_get(t.m_mapping, u)) == UCOL_NOT_FOUND
+            int CE = TrieBuilder.get32(t.m_mapping_, (int)u);
+		    if (CE == CE_NOT_FOUND_ 
+		        // this test is for contractions that are missing the starting 
+                // element. Looks like latin-1 should be done before 
+                // assembling the table, even if it results in more false 
+                // closure elements
+		        || (isContractionTableElement(CE) 
+                   && getCE(t.m_contractions_, CE, 0) == CE_NOT_FOUND_)) {
+                str.delete(0, str.length());
+                str.append(u);
+		        el.m_uchars_ = str.toString();
+		        el.m_cPoints_ = el.m_uchars_;
+		        el.m_prefix_ = 0;
+                int ceoffset = 0;
+                m_utilColEIter_.setText(el.m_uchars_);
+		        while (CE != CollationElementIterator.NULLORDER) {
+		            CE = m_utilColEIter_.next();
+		            if (CE != CollationElementIterator.NULLORDER) {
+		                ce[ceoffset ++] = CE;
+		            }
+		        }
+                el.m_CEs_ = new int[ceoffset];
+                System.arraycopy(ce, 0, el.m_CEs_, 0, ceoffset);
+		        addAnElement(t, el);
+		    }
+		}
+		
+        // copy contractions from the UCA - this is felt mostly for cyrillic
+		char conts[] = RuleBasedCollator.UCA_.m_UCAContraction_;
+        int offset = 0;
+		while (conts[offset] != 0) {
+		    // tailoredCE = ucmpe32_get(t.m_mapping, *conts);
+		    int tailoredCE = TrieBuilder.get32(t.m_mapping_, conts[offset]);
+		    if (tailoredCE != CE_NOT_FOUND_) {         
+		        boolean needToAdd = true;
+		        if (isContractionTableElement(tailoredCE)) {
+		            if (isTailored(t.m_contractions_, tailoredCE, 
+                                   conts, offset + 1) == true) {
+		                needToAdd = false;
+		            }
+		        }
+                if (needToAdd == true) { 
+                    // we need to add if this contraction is not tailored.
+		            el.m_prefix_ = 0;
+		            el.m_prefixChars_ = null;
+		            el.m_cPoints_ = el.m_uchars_;
+                    str.delete(0, str.length());
+                    str.append(conts[offset]);
+                    str.append(conts[offset + 1]);
+		            if (conts[offset + 2] != 0) {
+    		            str.append(conts[offset + 2]);
+    		        } 
+                    el.m_uchars_ = str.toString();
+                    int ceoffset = 0;
+                    m_utilColEIter_.setText(el.m_uchars_);
+                    while (true) {
+                        int CE = m_utilColEIter_.next();
+                        if (CE != CollationElementIterator.NULLORDER) {
+                            ce[ceoffset ++] = CE;
+                        }
+                        else {
+                            break;
+                        }
+                    }
+                    el.m_CEs_ = new int[ceoffset];
+                    System.arraycopy(ce, 0, el.m_CEs_, 0, ceoffset);
+		            addAnElement(t, el);
+                }
+		    }
+		    offset += 3;
+		}
+		
+        BuildTable temp = new BuildTable(t);
+        assembleTable(temp, temp.m_collator_);
+		// produce canonical closure 
+		CollationElementIterator coleiter 
+                            = temp.m_collator_.getCollationElementIterator("");
+        RangeValueIterator typeiter = UCharacter.getTypeIterator();
+        RangeValueIterator.Element element = new RangeValueIterator.Element();
+        while (typeiter.next(element)) {
+        	_enumCategoryRangeClosureCategory(t, temp.m_collator_, coleiter, 
+                                              element.start, element.limit, 
+                                              element.value);
+        }
+		// still need to produce compatibility closure
+		assembleTable(t, collator);  
+    }
+    
+    // private inner classes -------------------------------------------------
+    
+    private static class CEGenerator 
+    {
+        // package private data members --------------------------------------
+        
+	    WeightRange m_ranges_[];
+	    int m_rangesLength_;
+	    int m_byteSize_; 
+        int m_start_; 
+        int m_limit_;
+	    int m_maxCount_;
+	    int m_count_;
+	    int m_current_;
+	    int m_fLow_; // forbidden Low 
+	    int m_fHigh_; // forbidden High 
+        
+        // package private constructor ---------------------------------------
+        
+        CEGenerator() 
+        {
+            m_ranges_ = new WeightRange[7];      
+            for (int i = 6; i >= 0; i --) {
+                m_ranges_[i] = new WeightRange();
+            }
+        }
+	};
+
+    private static class WeightRange implements Comparable
+    {
+        // public methods ----------------------------------------------------
+        
+        /**
+         * Compares this object with target
+         * @param target object to compare with
+         * @return 0 if equals, 1 if this is > target, -1 otherwise
+         */
+        public int compareTo(Object target) 
+        {
+            if (this == target) {
+                return 0;
+            }
+            int tstart = ((WeightRange)target).m_start_;   
+            if (m_start_ == tstart) {
+                return 0;
+            }
+            if (m_start_ > tstart) {
+                return 1;
+            }
+            return -1;
+        }
+        
+        // package private data members --------------------------------------
+        
+	    int m_start_;
+        int m_end_;
+	    int m_length_; 
+        int m_count_;
+	    int m_length2_;
+	    int m_count2_;
+        
+        // package private constructor ---------------------------------------
+        
+        WeightRange()
+        {
+            m_start_ = 0;
+            m_end_ = 0;
+            m_length_ = 0; 
+            m_count_ = 0;
+            m_length2_ = 0;
+            m_count2_ = 0;
+        }
+	};
+	
+	private static class MaxJamoExpansionTable
+	{
+		// package private data members --------------------------------------
+		
+	    Vector m_endExpansionCE_;
+	    // vector of booleans
+	    Vector m_isV_;
+	    byte m_maxLSize_;
+	    byte m_maxVSize_;
+	    byte m_maxTSize_;
+	    
+	    // package private constructor ---------------------------------------
+	    
+	    MaxJamoExpansionTable()
+	    {
+	    	m_endExpansionCE_ = new Vector();
+	    	m_isV_ = new Vector();
+	    	m_endExpansionCE_.add(new Integer(0));
+	    	m_isV_.add(new Integer(0));
+            m_maxLSize_ = 1;
+            m_maxVSize_ = 1;
+            m_maxTSize_ = 1;
+	    }
+        
+        MaxJamoExpansionTable(MaxJamoExpansionTable table)
+        {
+            m_endExpansionCE_ = (Vector)table.m_endExpansionCE_.clone();
+            m_isV_ = (Vector)table.m_isV_.clone();
+            m_maxLSize_ = table.m_maxLSize_;
+            m_maxVSize_ = table.m_maxVSize_;
+            m_maxTSize_ = table.m_maxTSize_;
+        }
+	};
+	
+	private static class MaxExpansionTable 
+	{
+		// package private constructor --------------------------------------
+		
+		MaxExpansionTable() 
+		{
+			m_endExpansionCE_ = new Vector();
+			m_expansionCESize_ = new Vector();
+			m_endExpansionCE_.add(new Integer(0));
+			m_expansionCESize_.add(new Byte((byte)0));
+		}
+        
+        MaxExpansionTable(MaxExpansionTable table) 
+        {
+            m_endExpansionCE_ = (Vector)table.m_endExpansionCE_.clone();
+            m_expansionCESize_ = (Vector)table.m_expansionCESize_.clone();
+        }
+		
+		// package private data member --------------------------------------
+		
+	    Vector m_endExpansionCE_;
+	    Vector m_expansionCESize_;
+	};
+	
+	private static class BasicContractionTable 
+	{
+		// package private constructors -------------------------------------
+		
+		BasicContractionTable()
+		{
+			m_CEs_ = new Vector();
+	        m_codePoints_ = new StringBuffer();
+		}
+		
+		// package private data members -------------------------------------
+		
+	    StringBuffer m_codePoints_;
+	    Vector m_CEs_;
+    };
+    
+	private static class ContractionTable 
+	{
+		// package private constructor --------------------------------------
+		
+		/**
+		 * Builds a contraction table
+		 * @param buildtable
+		 */
+		ContractionTable(TrieBuilder.BuildTable mapping) 
+		{
+		    m_mapping_ = mapping;
+		    m_elements_ = new Vector();
+		    m_CEs_ = new Vector();
+		    m_codePoints_ = new StringBuffer();
+		    m_offsets_ = new Vector();
+		    m_currentTag_ = CE_NOT_FOUND_TAG_;
+		}
+        
+        /**
+         * Copies a contraction table.
+         * Not all data will be copied into their own object.
+         * @param table
+         */
+        ContractionTable(ContractionTable table) 
+        {
+            m_mapping_ = table.m_mapping_;
+            m_elements_ = (Vector)table.m_elements_.clone();
+            m_codePoints_ = new StringBuffer(table.m_codePoints_.toString());
+            m_CEs_ = (Vector)table.m_CEs_.clone();
+            m_offsets_ = (Vector)table.m_offsets_.clone();
+            m_currentTag_ = table.m_currentTag_;
+        }
+	
+	    // package private data members ------------------------------------
+	    
+		/**
+		 * Vector of BasicContractionTable
+		 */
+        Vector m_elements_;
+        TrieBuilder.BuildTable m_mapping_;
+        StringBuffer m_codePoints_;
+	    Vector m_CEs_;
+	    Vector m_offsets_;
+	    int m_currentTag_;
+	};
+
+	private static class BuildTable 
+	{
+		// package private constructor --------------------------------------
+		
+		/**
+		 * Returns a table
+		 * @return build table
+		 */
+		BuildTable(CollationRuleParser parser) 
+		{
+			m_collator_ = new RuleBasedCollator();
+			m_collator_.setWithUCAData();
+	        MaxExpansionTable maxet = new MaxExpansionTable();
+	        MaxJamoExpansionTable maxjet = new MaxJamoExpansionTable();
+	        m_options_ = parser.m_options_;
+	        m_expansions_ = new Vector();
+	        // Do your own mallocs for the structure, array and have linear 
+	        // Latin 1
+	        m_mapping_ = TrieBuilder.open(null, null, 0x100000, 
+	                                      RuleBasedCollator.CE_SPECIAL_FLAG_
+                                          | (CE_NOT_FOUND_TAG_ << 24), 
+	                                      true); 
+	        m_prefixLookup_ = new Hashtable();
+	        // uhash_open(prefixLookupHash, prefixLookupComp);
+	        m_contractions_ = new ContractionTable(m_mapping_);
+	        // copy UCA's maxexpansion and merge as we go along
+		    m_maxExpansions_ = maxet;
+		    // adding an extra initial value for easier manipulation 
+		    for (int i = 0; 
+		         i < RuleBasedCollator.UCA_.m_expansionEndCE_.length; i ++) {
+		         maxet.m_endExpansionCE_.add(new Integer(
+		                         RuleBasedCollator.UCA_.m_expansionEndCE_[i]));
+		         maxet.m_expansionCESize_.add(new Byte(
+                          RuleBasedCollator.UCA_.m_expansionEndCEMaxSize_[i]));
+		    }
+		    m_maxJamoExpansions_ = maxjet;
+		
+		    m_unsafeCP_ = new byte[UNSAFECP_TABLE_SIZE_];
+		    m_contrEndCP_ = new byte[UNSAFECP_TABLE_SIZE_];
+		    Arrays.fill(m_unsafeCP_, (byte)0);
+		    Arrays.fill(m_contrEndCP_, (byte)0);
+		}
+	
+        /**
+         * Duplicating a BuildTable.
+         * Not all data will be duplicated into their own object.
+         * @param table to clone
+         */
+        BuildTable(BuildTable table) 
+        {
+            m_collator_ = table.m_collator_;
+            m_mapping_ = new TrieBuilder.BuildTable(table.m_mapping_);
+            m_expansions_ = (Vector)table.m_expansions_.clone();
+            m_contractions_ = new ContractionTable(table.m_contractions_);
+            m_contractions_.m_mapping_ = m_mapping_;
+            m_options_ = table.m_options_;
+            m_maxExpansions_ = new MaxExpansionTable(table.m_maxExpansions_);
+            m_maxJamoExpansions_ 
+                       = new MaxJamoExpansionTable(table.m_maxJamoExpansions_);
+            m_unsafeCP_ = new byte[table.m_unsafeCP_.length];
+            System.arraycopy(table.m_unsafeCP_, 0, m_unsafeCP_, 0,
+                             m_unsafeCP_.length);
+            m_contrEndCP_ = new byte[table.m_contrEndCP_.length];
+            System.arraycopy(table.m_contrEndCP_, 0, m_contrEndCP_, 0,
+                             m_contrEndCP_.length);
+        }
+        
+		// package private data members -------------------------------------
+		
+		RuleBasedCollator m_collator_;
+        TrieBuilder.BuildTable m_mapping_; 
+        Vector m_expansions_; 
+        ContractionTable m_contractions_;
+	    // UCATableHeader image;
+	    CollationRuleParser.OptionSet m_options_;
+	    MaxExpansionTable m_maxExpansions_;
+	    MaxJamoExpansionTable m_maxJamoExpansions_;
+	    byte m_unsafeCP_[];
+	    byte m_contrEndCP_[];
+	    Hashtable m_prefixLookup_;
+	}; 
+	
+	private static class Elements
+	{
+		// package private data members -------------------------------------
+		
+		String m_prefixChars_;
+	    int m_prefix_;
+	    String m_uchars_;
+	    /**
+	     * Working string
+	     */
+	    String m_cPoints_;    
+	    /**
+	     * Offset to the working string
+	     */
+	    int m_cPointsOffset_;
+	    /** 
+	     * These are collation elements - there could be more than one - in 
+	     * case of expansion 
+	     */    
+	    int m_CEs_[];      
+	    /** 
+	     * This is the value element maps in original table   
+	     */
+	    int m_mapCE_;         
+	    int m_sizePrim_[];
+	    int m_sizeSec_[];
+	    int m_sizeTer_[];
+	    boolean m_variableTop_;
+	    boolean m_caseBit_;
+	    boolean m_isThai_;
+	    
+		// package private constructors -------------------------------------
+		
+		/**
+		 * Package private constructor
+		 */
+		Elements()
+		{
+			m_sizePrim_ = new int[128];	
+		    m_sizeSec_ = new int[128];	
+		    m_sizeTer_ = new int[128];	
+		}
+
+        /**
+		 * Package private constructor
+		 */
+		Elements(Elements element)
+		{
+            m_prefixChars_ = element.m_prefixChars_;
+            m_prefix_ = element.m_prefix_;
+            m_uchars_ = element.m_uchars_;
+            m_cPoints_ = element.m_cPoints_;    
+            m_cPointsOffset_ = element.m_cPointsOffset_;    
+            m_CEs_ = element.m_CEs_;
+            m_mapCE_ = element.m_mapCE_;
+		    m_sizePrim_ = element.m_sizePrim_;
+		    m_sizeSec_ = element.m_sizeSec_;
+		    m_sizeTer_ = element.m_sizeTer_;
+		    m_variableTop_ = element.m_variableTop_;
+		    m_caseBit_ = element.m_caseBit_;
+		    m_isThai_ = element.m_isThai_;
+		}
+
+        // package private methods -------------------------------------------
+        
+        /**
+         * Hashcode calculation for token
+         * @return the hashcode
+         */
+        public int hashCode()
+        {
+        	String str = m_cPoints_.substring(m_cPointsOffset_);
+		    return str.hashCode();
+	    }
+		
+		/**
+	     * Equals calculation
+	     * @param target object to compare
+	     * @return true if target is the same as this object
+	     */
+	    public boolean equals(Object target)
+	    {
+	        if (target == this) {
+	            return true;
+	        }
+	        if (target instanceof Elements) {
+	        	Elements t = (Elements)target;
+	        	int size = m_cPoints_.length() - m_cPointsOffset_;
+	        	if (size == t.m_cPoints_.length() - t.m_cPointsOffset_) {
+				    return t.m_cPoints_.regionMatches(t.m_cPointsOffset_, 
+				                                      m_cPoints_, 
+				                                      m_cPointsOffset_, size);
+	        	}
+            }
+            return false;
+		}
+	};
+
+    // private data member ---------------------------------------------------
+    
+    /**
+     * Maximum strength used in CE building
+     */
+    private static final int CE_BASIC_STRENGTH_LIMIT_ = 3;
+    /**
+     * Maximum collation strength
+     */
+    private static final int CE_STRENGTH_LIMIT_ = 16;
+    /**
+     * Implicit ce minimum
+     */
+    private static final int CE_PRIMARY_IMPLICIT_MIN_ = 0xE8000000;
+    private static final int CE_PRIMARY_IMPLICIT_MAX_ = 0xF0000000;
+    /**
+     * Strength mask array, used in inverse UCA
+     */
+    private static final int STRENGTH_MASK_[] = {0xFFFF0000, 0xFFFFFF00, 
+                                                 0xFFFFFFFF};
+    /**
+     * CE tag for not found
+     */
+    private static final int CE_NOT_FOUND_ = 0xF0000000;
+    /**
+     * CE tag for not found
+     */
+    private static final int CE_NOT_FOUND_TAG_ = 0;
+    /**
+     * This code point results in an expansion 
+     */
+    private static final int CE_EXPANSION_TAG_ = 1;
+    /** 
+     * Start of a contraction 
+     */
+    private static final int CE_CONTRACTION_TAG_ = 2;
+    /** 
+     * Thai character - do the reordering 
+     */
+    private static final int CE_THAI_TAG_ = 3;            
+    /** 
+     * Charset processing, not yet implemented
+     */
+    private static final int CE_CHARSET_TAG_ = 4;         
+    /** 
+     * Lead surrogate that is tailored and doesn't start a contraction 
+     */
+    private static final int CE_SURROGATE_TAG_ = 5;
+    /** 
+     * AC00-D7AF
+     */
+    private static final int CE_HANGUL_SYLLABLE_TAG_ = 6;
+    /** 
+     * D800-DBFF
+     */
+    private static final int CE_LEAD_SURROGATE_TAG_ = 7;
+    /** 
+     * DC00-DFFF
+     */
+    private static final int CE_TRAIL_SURROGATE_TAG_ = 8; 
+    /** 
+     * 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
+     */    
+    private static final int CE_CJK_IMPLICIT_TAG_ = 9;
+    private static final int CE_IMPLICIT_TAG_ = 10;
+    private static final int CE_SPEC_PROC_TAG_ = 11;
+    /** 
+     * This is a three byte primary with starting secondaries and tertiaries.
+     * It fits in a single 32 bit CE and is used instead of expansion to save
+     * space without affecting the performance (hopefully) 
+     */
+    private static final int CE_LONG_PRIMARY_TAG_ = 12;  
+    /** 
+     * Unsafe UChar hash table table size. Size is 32 bytes for 1 bit for each 
+     * latin 1 char + some power of two for hashing the rest of the chars. 
+     * Size in bytes                               
+     */
+    private static final int UNSAFECP_TABLE_SIZE_ = 1056;
+    /** 
+     * Mask value down to "some power of two" -1. Number of bits, not num of 
+     * bytes.       
+     */
+    private static final int UNSAFECP_TABLE_MASK_ = 0x1fff;
+    /**
+     * Case values
+     */
+    private static final int UPPER_CASE_ = 0x80;
+    private static final int MIXED_CASE_ = 0x40;
+    private static final int LOWER_CASE_ = 0x00;
+    /**
+     * Initial table size
+     */
+    private static final int INIT_TABLE_SIZE_ = 1028;
+    /**
+     * Header size, copied from ICU4C, to be changed when that value changes
+     */
+    private static final int HEADER_SIZE_ = 0xC4;
+    /**
+     * Contraction table new element indicator
+     */
+    private static final int CONTRACTION_TABLE_NEW_ELEMENT_ = 0xFFFFFF;
+    /**
+     * Parser for the rules
+     */
+    private CollationRuleParser m_parser_;
+    /**
+     * Utility UCA collation element iterator
+     */
+    private CollationElementIterator m_utilColEIter_;
+    
+    // private methods -------------------------------------------------------
+    
+    /**
+     * @param listheader parsed rule tokens
+     * @exception Exception thrown when internal error occurs
+     */
+    private void initBuffers(CollationRuleParser.TokenListHeader listheader) 
+                                                            throws Exception
+    {
+        CEGenerator gens[] = {new CEGenerator(), new CEGenerator(),
+                              new CEGenerator()};
+		int ceparts[] = new int[CE_BASIC_STRENGTH_LIMIT_];
+		CollationRuleParser.Token token = listheader.m_last_;
+		int t[] = new int[CE_STRENGTH_LIMIT_];
+		Arrays.fill(t, 0, CE_STRENGTH_LIMIT_, 0);
+		
+		token.m_toInsert_ = 1;
+		t[token.m_strength_] = 1;
+		
+		while (token.m_previous_ != null) {
+		    if (token.m_previous_.m_strength_ < token.m_strength_) { 
+                // going up
+		        t[token.m_strength_] = 0;
+		        t[token.m_previous_.m_strength_] ++;
+		    } 
+            else if (token.m_previous_.m_strength_ > token.m_strength_) { 
+                // going down
+		        t[token.m_previous_.m_strength_] = 1;
+		    } 
+            else {
+		        t[token.m_strength_] ++;
+		    }
+		    token = token.m_previous_;
+		    token.m_toInsert_ = t[token.m_strength_];
+		} 
+		
+		token.m_toInsert_ = t[token.m_strength_];
+		INVERSE_UCA_.getInverseGapPositions(listheader);
+		
+		token = listheader.m_first_;
+		int fstrength = Collator.IDENTICAL;
+		int initstrength = Collator.IDENTICAL;
+		
+		ceparts[Collator.PRIMARY] = mergeCE(listheader.m_baseCE_, 
+                                            listheader.m_baseContCE_,
+                                            Collator.PRIMARY);
+		ceparts[Collator.SECONDARY] = mergeCE(listheader.m_baseCE_, 
+                                              listheader.m_baseContCE_,
+                                              Collator.SECONDARY);
+		ceparts[Collator.TERTIARY] = mergeCE(listheader.m_baseCE_, 
+                                             listheader.m_baseContCE_,
+                                             Collator.TERTIARY);
+		while (token != null) {
+		    fstrength = token.m_strength_;
+		    if (fstrength < initstrength) {
+		        initstrength = fstrength;
+		        if (listheader.m_pos_[fstrength] == -1) {
+		            while (listheader.m_pos_[fstrength] == -1 && fstrength > 0) 
+                    {
+		                fstrength--;
+		            }
+		            if (listheader.m_pos_[fstrength] == -1) {
+		                throw new Exception("Internal program error");
+		            }
+		        }
+		        if (initstrength == Collator.TERTIARY) { 
+                    // starting with tertiary
+			        ceparts[Collator.PRIMARY] 
+                                         = listheader.m_gapsLo_[fstrength * 3];
+			        ceparts[Collator.SECONDARY] 
+                                     = listheader.m_gapsLo_[fstrength * 3 + 1];
+			        ceparts[Collator.TERTIARY] = getCEGenerator(
+                                                       gens[Collator.TERTIARY], 
+                                                       listheader.m_gapsLo_, 
+                                                       listheader.m_gapsHi_, 
+                                                       token, fstrength); 
+		        } 
+                else if (initstrength == Collator.SECONDARY) { 
+                    // secondaries
+		            ceparts[Collator.PRIMARY] 
+                                         = listheader.m_gapsLo_[fstrength * 3];
+		            ceparts[Collator.SECONDARY] 
+                                     = getCEGenerator(gens[Collator.SECONDARY], 
+                                                      listheader.m_gapsLo_, 
+                                                      listheader.m_gapsHi_, 
+                                                      token, 
+                                                      fstrength);
+		            ceparts[Collator.TERTIARY] = getSimpleCEGenerator(
+                                                       gens[Collator.TERTIARY], 
+                                                       token, 
+                                                       Collator.TERTIARY);
+		        } 
+                else { 
+                    // primaries 
+		            ceparts[Collator.PRIMARY] = getCEGenerator(
+                                                        gens[Collator.PRIMARY], 
+                                                        listheader.m_gapsLo_, 
+                                                        listheader.m_gapsHi_, 
+                                                        token, fstrength);
+		            ceparts[Collator.SECONDARY] = getSimpleCEGenerator(
+                                                      gens[Collator.SECONDARY], 
+                                                      token, 
+                                                      Collator.SECONDARY);
+		            ceparts[Collator.TERTIARY] = getSimpleCEGenerator(
+                                                       gens[Collator.TERTIARY], 
+                                                       token, 
+                                                       Collator.TERTIARY);
+		        }
+		    } 
+            else {
+		        if (token.m_strength_ == Collator.TERTIARY) {
+		            ceparts[Collator.TERTIARY] = getNextGenerated(
+                                                      gens[Collator.TERTIARY]);
+		        } 
+                else if (token.m_strength_ == Collator.SECONDARY) {
+		            ceparts[Collator.SECONDARY] = getNextGenerated(
+                                                     gens[Collator.SECONDARY]);
+		            ceparts[Collator.TERTIARY] = getSimpleCEGenerator(
+                                                       gens[Collator.TERTIARY], 
+                                                       token, 
+                                                       Collator.TERTIARY);
+		        } 
+                else if (token.m_strength_ == Collator.PRIMARY) {
+		            ceparts[Collator.PRIMARY] = getNextGenerated(
+                                                       gens[Collator.PRIMARY]);
+		            ceparts[Collator.SECONDARY] = getSimpleCEGenerator(
+                                                      gens[Collator.SECONDARY], 
+                                                      token, 
+                                                      Collator.SECONDARY);
+		            ceparts[Collator.TERTIARY] = getSimpleCEGenerator(
+                                                      gens[Collator.TERTIARY], 
+                                                      token, 
+                                                      Collator.TERTIARY);
+		        }
+		    }
+		    doCE(ceparts, token);
+		    token = token.m_next_;
+	    }
+	}
+
+    /**
+     * Get the next generated ce
+     * @param g ce generator
+     * @return next generated ce 
+     */
+    private int getNextGenerated(CEGenerator g) 
+    {
+        g.m_current_ = nextWeight(g);
+        return g.m_current_;
+    }
+
+    /**
+     * @param g CEGenerator
+     * @param token rule token
+     * @param fstrength 
+     * @return ce generator
+     * @exception Exception thrown when internal error occurs
+     */
+    private int getSimpleCEGenerator(CEGenerator g, 
+                                     CollationRuleParser.Token token, 
+                                     int strength) throws Exception
+    {
+        int high, low, count = 1;
+        int maxbyte = (strength == Collator.TERTIARY) ? 0x3F : 0xFF;
+
+	    if (strength == Collator.SECONDARY) {
+		    low = RuleBasedCollator.COMMON_TOP_2_ << 24;
+		    high = 0xFFFFFFFF;
+		    count = 0xFF - RuleBasedCollator.COMMON_TOP_2_;
+	    } 
+        else {
+	        low = RuleBasedCollator.BYTE_COMMON_ << 24; //0x05000000;
+	        high = 0x40000000;
+	        count = 0x40 - RuleBasedCollator.BYTE_COMMON_;
+	    }
+	
+	    if (token.m_next_ != null && token.m_next_.m_strength_ == strength) {
+	        count = token.m_next_.m_toInsert_;
+	    } 
+	
+	    g.m_rangesLength_ = allocateWeights(low, high, count, maxbyte, 
+                                            g.m_ranges_);
+	    g.m_current_ = RuleBasedCollator.BYTE_COMMON_ << 24;
+	
+	    if (g.m_rangesLength_ == 0) {
+	        throw new Exception("Internal program error");
+	    }
+	    return g.m_current_;
+	}
+
+    /**
+     * Combines 2 ce into one with respect to the argument strength
+     * @param ce1 first ce
+     * @param ce2 second ce
+     * @param strength strength to use
+     * @return combined ce
+     */
+    private static int mergeCE(int ce1, int ce2, int strength) 
+    {
+        int mask = RuleBasedCollator.CE_TERTIARY_MASK_;
+        if (strength == Collator.SECONDARY) {
+            mask = RuleBasedCollator.CE_SECONDARY_MASK_;
+        }
+        else if (strength == Collator.PRIMARY) {
+            mask = RuleBasedCollator.CE_PRIMARY_MASK_;
+        }
+        ce1 &= mask;
+        ce2 &= mask;
+        switch (strength) 
+        {
+            case Collator.PRIMARY:
+                return ce1 | ce2 >> 16;
+            case Collator.SECONDARY:
+                return ce1 << 16 | ce2 << 8;
+            default:
+                return ce1 << 24 | ce2 << 16;
+        }
+    }
+    
+    /**
+     * @param g CEGenerator
+     * @param lows low gap array
+     * @param highs high gap array
+     * @param token rule token
+     * @param fstrength 
+     * @exception Exception thrown when internal error occurs
+     */
+    private int getCEGenerator(CEGenerator g, int lows[], int highs[], 
+                               CollationRuleParser.Token token, int fstrength) 
+                               throws Exception
+    {
+	    int strength = token.m_strength_;
+	    int low = lows[fstrength * 3 + strength];
+	    int high = highs[fstrength * 3 + strength];
+	    int maxbyte = (strength == Collator.TERTIARY) ? 0x3F : 0xFF;
+	
+	    int count = token.m_toInsert_;
+	
+	    if (low >= high && strength > Collator.PRIMARY) {
+	        int s = strength;
+	        while (true) {
+		        s --;
+		        if (lows[fstrength * 3 + s] != highs[fstrength * 3 + s]) {
+		            if (strength == Collator.SECONDARY) {
+		                low = RuleBasedCollator.COMMON_TOP_2_ << 24;
+		                high = 0xFFFFFFFF;
+		            } 
+                    else {
+		                // low = 0x02000000; 
+                        // This needs to be checked - what if low is
+		                // not good...
+		                high = 0x40000000;
+		            }
+		            break;
+		        }
+		        if (s < 0) {
+		            throw new Exception("Internal program error");
+		        }
+	        }
+	    } 
+	    if (low == 0) {
+	        low = 0x01000000;
+	    }
+	    if (strength == Collator.SECONDARY) { // similar as simple 
+	        if (low >= (RuleBasedCollator.COMMON_BOTTOM_2_ <<24) 
+                && low < (RuleBasedCollator.COMMON_TOP_2_ << 24)) {
+	            low = RuleBasedCollator.COMMON_TOP_2_ << 24;
+            }
+	        if (high > (RuleBasedCollator.COMMON_BOTTOM_2_ << 24) 
+                && high < (RuleBasedCollator.COMMON_TOP_2_ << 24)) {
+	            high = RuleBasedCollator.COMMON_TOP_2_ << 24;
+	        } 
+	        if (low < (RuleBasedCollator.COMMON_BOTTOM_2_ << 24)) {
+	            g.m_rangesLength_ = allocateWeights(
+                                         RuleBasedCollator.COMMON_TOP_2_ << 24, 
+                                         high, count, maxbyte, g.m_ranges_);
+	            g.m_current_ = RuleBasedCollator.COMMON_BOTTOM_2_;
+	            return g.m_current_;
+	        }
+	    } 
+	
+	    g.m_rangesLength_ = allocateWeights(low, high, count, maxbyte, 
+                                            g.m_ranges_);
+	    if (g.m_rangesLength_ == 0) {
+	        throw new Exception("Internal program error");
+	    }
+	    g.m_current_ = nextWeight(g);
+	    return g.m_current_;
+	}
+
+    /**
+     * @param ceparts list of collation elements parts
+     * @param token rule token
+     */
+    private void doCE(int ceparts[], CollationRuleParser.Token token) 
+    {
+        // this one makes the table and stuff
+	    int noofbytes[] = new int[3];
+	    for (int i = 0; i < 3; i ++) {
+	        noofbytes[i] = countBytes(ceparts[i]);
+	    }
+	
+	    // Here we have to pack CEs from parts
+	    int cei = 0;
+	    int value = 0;
+	
+	    while ((cei << 1) < noofbytes[0] || cei < noofbytes[1] 
+               || cei <noofbytes[2]) {
+	        if (cei > 0) {
+	            value = RuleBasedCollator.CE_CONTINUATION_MARKER_;
+		    } else {
+		        value = 0;
+		    }
+		
+		    if ((cei << 1) < noofbytes[0]) {
+		        value |= ((ceparts[0] >> (32 - ((cei + 1) << 4))) & 0xFFFF) 
+                                                                        << 16;
+		    }
+		    if (cei < noofbytes[1]) {
+		        value |= ((ceparts[1] >> (32 - ((cei + 1) << 3))) & 0xFF) << 8;
+		    }
+		    if (cei < noofbytes[2]) {
+		        value |= ((ceparts[2] >> (32 - ((cei+1) << 3))) & 0x3F);
+		    }
+		    token.m_CE_[cei] = value;
+		    cei ++;
+		  }
+		  if (cei == 0) { // totally ignorable
+		      token.m_CELength_ = 1;
+		      token.m_CE_[0] = 0;
+		  } 
+          else { // there is at least something
+		      token.m_CELength_ = cei;
+		}
+	}
+
+    /**
+     * Count the number of non-zero bytes used in the ce
+     * @param ce 
+     * @return number of non-zero bytes used in ce
+     */
+    private static final int countBytes(int ce)   
+    {                               
+	    int mask = 0xFFFFFFFF;   
+	    int result = 0;              
+	    while (mask != 0) {            
+	        if ((ce & mask) != 0) { 
+	            result ++;            
+	        }                           
+	        mask >>>= 8;                 
+	    }   
+        return result;                          
+	}
+	
+	/**
+	 * We are ready to create collation elements
+	 * @param t build table to insert
+	 * @param lh rule token list header
+	 * @exception Exception thrown when internal program error occurs
+	 */
+	private void createElements(BuildTable t, 
+	                            CollationRuleParser.TokenListHeader lh) 
+	                            throws Exception
+    {
+	    CollationRuleParser.Token tok = lh.m_first_;
+	    Elements el = new Elements();
+	    while (tok != null) {
+	    	// first, check if there are any expansions
+	    	// if there are expansions, we need to do a little bit more 
+	    	// processing since parts of expansion can be tailored, while 
+	    	// others are not
+	    	if (tok.m_expansion_ != 0) {
+	            int len = tok.m_expansion_ >> 24;
+	            int currentSequenceLen = len;
+	            int expOffset = tok.m_expansion_ & 0x00FFFFFF;
+	            CollationRuleParser.Token exp 
+	                                         = new CollationRuleParser.Token();
+	            exp.m_source_ = currentSequenceLen | expOffset;
+	            exp.m_rules_ = m_parser_.m_source_.toString();
+	
+	            while (len > 0) {
+			        currentSequenceLen = len;
+			        while (currentSequenceLen > 0) {
+			            exp.m_source_ = (currentSequenceLen << 24) | expOffset;
+			            CollationRuleParser.Token expt = 
+			                                   (CollationRuleParser.Token)
+			                                   m_parser_.m_hashTable_.get(exp);
+			            if (expt != null 
+			                && expt.m_strength_ 
+			                   != CollationRuleParser.TOKEN_RESET_) { 
+			                // expansion is tailored
+			                int noOfCEsToCopy = expt.m_CELength_;
+			                for (int j = 0; j < noOfCEsToCopy; j ++) {
+			                    tok.m_expCE_[tok.m_expCELength_ + j] 
+			                                                   = expt.m_CE_[j];
+			                 }
+			                 tok.m_expCELength_ += noOfCEsToCopy;
+			                 // never try to add codepoints and CEs.
+			                 // For some odd reason, it won't work.
+			                 expOffset += currentSequenceLen; //noOfCEsToCopy;
+			                 len -= currentSequenceLen; //noOfCEsToCopy;
+			                 break;
+			             } 
+			             else {
+			                 currentSequenceLen--;
+			             }
+			        }
+	                if (currentSequenceLen == 0) { 
+	                    // couldn't find any tailored subsequence, will have to 
+	                    // get one from UCA. first, get the UChars from the 
+	                    // rules then pick CEs out until there is no more and 
+	                    // stuff them into expansion
+	                    m_utilColEIter_.setText(m_parser_.m_source_.substring(
+	                                                expOffset, expOffset + 1));
+	                    while (true) {
+	                        int order = m_utilColEIter_.next();
+	                        if (order == CollationElementIterator.NULLORDER) {
+	                            break;
+	                        }
+	                        tok.m_expCE_[tok.m_expCELength_ ++] = order;
+	                    }
+	                    expOffset ++;
+	                    len --;
+	                }
+	            }
+	        } 
+	        else {
+	            tok.m_expCELength_ = 0;
+	        }
+	
+	        // set the ucaelement with obtained values
+	        el.m_CEs_ = new int[tok.m_CELength_ + tok.m_expCELength_];
+	        // copy CEs
+	        System.arraycopy(tok.m_CE_, 0, el.m_CEs_, 0, tok.m_CELength_);
+	        System.arraycopy(tok.m_expCE_, 0, el.m_CEs_, tok.m_CELength_, 
+                             tok.m_expCELength_);
+	
+	        // copy UChars 
+	        // We kept prefix and source kind of together, as it is a kind of a 
+	        // contraction. 
+	        // However, now we have to slice the prefix off the main thing - 
+	        el.m_prefix_ = 0;// el.m_prefixChars_;
+	        el.m_cPointsOffset_ = 0; //el.m_uchars_;
+	        if (tok.m_prefix_ != 0) { 
+	        	// we will just copy the prefix here, and adjust accordingly in 
+	        	// the addPrefix function in ucol_elm. The reason is that we 
+	        	// need to add both composed AND decomposed elements to the 
+	        	// unsafe table.
+		        int size = tok.m_prefix_ >> 24;
+		        int offset = tok.m_prefix_ & 0x00FFFFFF;
+		        el.m_prefixChars_ = m_parser_.m_source_.substring(offset, 
+		                                                       offset + size);
+		        size = (tok.m_source_ >> 24) - (tok.m_prefix_ >> 24); 
+		        offset = (tok.m_source_ & 0x00FFFFFF) + (tok.m_prefix_ >> 24);
+		        el.m_uchars_ = m_parser_.m_source_.substring(offset, 
+		                                                     offset + size);
+		    } 
+		    else {
+	            el.m_prefixChars_ = null;
+	            int offset = tok.m_source_ & 0x00FFFFFF;
+	            int size = tok.m_source_ >> 24;
+	            el.m_uchars_ = m_parser_.m_source_.substring(offset, 
+	                                                         offset + size);
+	        }
+	        el.m_cPoints_ = el.m_uchars_;
+	        el.m_isThai_ = CollationElementIterator.isThaiPreVowel(
+	                                                  el.m_cPoints_.charAt(0));
+	        for (int i = 0; i < el.m_cPoints_.length() - el.m_cPointsOffset_; 
+	             i ++) {
+		         if (isJamo(el.m_cPoints_.charAt(i))) {
+		             t.m_collator_.m_isJamoSpecial_ = true;
+		             break;
+		        }
+		    }
+	
+	        // Case bits handling 
+	        el.m_CEs_[0] &= 0xFFFFFF3F; // Clean the case bits field
+	        if (el.m_cPoints_.length() - el.m_cPointsOffset_ > 1) {
+	            // Do it manually
+	            el.m_CEs_[0] |= getCaseBits(el.m_cPoints_);
+	        } 
+	        else {
+	            // Copy it from the UCA
+	            int caseCE = getFirstCE(el.m_cPoints_.charAt(0));
+	            el.m_CEs_[0] |= (caseCE & 0xC0);
+	        }
+	
+	        // and then, add it
+	        addAnElement(t, el);
+	        tok = tok.m_next_;
+	    }   
+	}
+	
+	/**
+	 * Testing if the string argument has case
+	 * @param src string
+	 * @return the case for this char array
+	 * @exception Exception thrown when internal program error occurs
+	 */
+	private final int getCaseBits(String src) throws Exception
+	{
+		int uCount = 0; 
+		int lCount = 0;
+		src = Normalizer.decompose(src, true);
+		m_utilColEIter_.setText(src);
+		for (int i = 0; i < src.length(); i++) {
+			m_utilColEIter_.setText(src.substring(i, i + 1));
+		    int order = m_utilColEIter_.next();
+		    if (RuleBasedCollator.isContinuation(order)) {
+		        throw new Exception("Internal program error");
+		    }
+		    if ((order & RuleBasedCollator.CE_CASE_BIT_MASK_)
+		                                                  == UPPER_CASE_) {
+		        uCount ++;
+		    } 
+		    else {
+		    	char ch = src.charAt(i);
+		        if (UCharacter.isLowerCase(ch)) {
+		            lCount ++;
+		        } 
+		        else {
+		            if (toSmallKana(ch) == ch && toLargeKana(ch) != ch) {
+		                lCount ++;
+		            }
+		        }
+		    }
+		}
+		
+		if (uCount != 0 && lCount != 0) {
+		    return MIXED_CASE_;
+		} 
+		else if (uCount != 0) {
+		    return UPPER_CASE_;
+		} 
+		else {
+		    return LOWER_CASE_;
+		}
+	}
+	
+	/**
+	 * Converts a char to the uppercase Kana
+	 * @param ch character to convert
+	 * @return the converted Kana character
+	 */
+	private static final char toLargeKana(char ch) 
+	{
+	    if (0x3042 < ch && ch < 0x30ef) { // Kana range 
+	        switch (ch - 0x3000) {
+	            case 0x41: 
+	            case 0x43: 
+	            case 0x45: 
+	            case 0x47: 
+	            case 0x49: 
+	            case 0x63: 
+	            case 0x83: 
+	            case 0x85: 
+	            case 0x8E:
+	            case 0xA1: 
+	            case 0xA3: 
+	            case 0xA5: 
+	            case 0xA7: 
+	            case 0xA9: 
+	            case 0xC3: 
+	            case 0xE3: 
+	            case 0xE5: 
+	            case 0xEE:
+	                ch ++;
+	            break;
+	        case 0xF5:
+	            ch = 0x30AB;
+	            break;
+	        case 0xF6:
+	            ch = 0x30B1;
+	            break;
+	        }
+	    }
+	    return ch;
+	}
+	
+	/**
+	 * Converts a char to the lowercase Kana
+	 * @param ch character to convert
+	 * @return the converted Kana character
+	 */
+	private static final char toSmallKana(char ch) 
+	{
+	    if (0x3042 < ch && ch < 0x30ef) { // Kana range
+	        switch (ch - 0x3000) {
+	            case 0x42: 
+	            case 0x44: 
+	            case 0x46: 
+	            case 0x48: 
+	            case 0x4A: 
+	            case 0x64: 
+	            case 0x84: 
+	            case 0x86: 
+	            case 0x8F:
+	            case 0xA2: 
+	            case 0xA4: 
+	            case 0xA6: 
+	            case 0xA8: 
+	            case 0xAA: 
+	            case 0xC4: 
+	            case 0xE4: 
+	            case 0xE6: 
+	            case 0xEF:
+	                ch --;
+	                break;
+	            case 0xAB:
+	                ch = 0x30F5;
+	                break;
+	            case 0xB1:
+	                ch = 0x30F6;
+	                break;
+	        }
+	    }
+	    return ch;
+	}
+
+    /**
+     * This should be connected to special Jamo handling.
+     */
+    private final int getFirstCE(char ch) 
+    {
+    	m_utilColEIter_.setText(UCharacter.toString(ch));
+	    return m_utilColEIter_.next();
+	}
+	
+	/** 
+	 * This adds a read element, while testing for existence 
+	 * @param t build table
+	 * @param element 
+	 * @return ce
+	 */
+    private static int addAnElement(BuildTable t, Elements element) 
+    {
+  		Vector expansions = t.m_expansions_;
+        if (element.m_CEs_.length == 1) {
+	    	if (element.m_isThai_ == false) {
+	            element.m_mapCE_ = element.m_CEs_[0];
+	        } 
+	        else { // add thai - totally bad here
+	            int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_ 
+	                        | (CE_THAI_TAG_ << RuleBasedCollator.CE_TAG_SHIFT_) 
+	                        | ((addExpansion(expansions, element.m_CEs_[0])
+	                                               + (HEADER_SIZE_ >> 2)) << 4) 
+	                        | 0x1;
+	            element.m_mapCE_ = expansion;
+	        }
+	    } 
+	    else {     
+	        // unfortunately, it looks like we have to look for a long primary 
+	        // here since in canonical closure we are going to hit some long 
+	        // primaries from the first phase, and they will come back as 
+	        // continuations/expansions destroying the effect of the previous 
+	        // opitimization. A long primary is a three byte primary with 
+	        // starting secondaries and tertiaries. It can appear in long runs 
+	        // of only primary differences (like east Asian tailorings) also, 
+	        // it should not be an expansion, as expansions would break with 
+	        // this
+	        if (element.m_CEs_.length == 2 // a two CE expansion 
+	            && RuleBasedCollator.isContinuation(element.m_CEs_[1]) 
+	            && (element.m_CEs_[1] 
+	             & (~(0xFF << 24 | RuleBasedCollator.CE_CONTINUATION_MARKER_))) 
+	                            == 0 // that has only primaries in continuation
+	            && (((element.m_CEs_[0] >> 8) & 0xFF) 
+	                                         == RuleBasedCollator.BYTE_COMMON_) 
+	            // a common secondary
+	            && ((element.m_CEs_[0] & 0xFF) 
+	                == RuleBasedCollator.BYTE_COMMON_) // and a common tertiary
+	            ) {
+	            element.m_mapCE_ = RuleBasedCollator.CE_SPECIAL_FLAG_ 
+	                               // a long primary special
+	                               | (CE_LONG_PRIMARY_TAG_ << 24) 
+	                               // first and second byte of primary
+	                               | ((element.m_CEs_[0] >> 8) & 0xFFFF00) 
+	                               // third byte of primary
+	                               | ((element.m_CEs_[1] >> 24) & 0xFF);   
+	        } 
+	        else {
+	            int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_ 
+	                        | (CE_EXPANSION_TAG_ 
+	                                      << RuleBasedCollator.CE_TAG_SHIFT_) 
+	                        | ((addExpansion(expansions, element.m_CEs_[0])
+	                            + (HEADER_SIZE_ >> 2)) << 4) & 0xFFFFF0;
+	
+	            for (int i = 1; i < element.m_CEs_.length; i ++) {
+	                 addExpansion(expansions, element.m_CEs_[i]);
+	            }
+			    if (element.m_CEs_.length <= 0xF) {
+			        expansion |= element.m_CEs_.length;
+			    } 
+			    else {
+			        addExpansion(expansions, 0);
+			    }
+			    element.m_mapCE_ = expansion;
+			    setMaxExpansion(element.m_CEs_[element.m_CEs_.length - 1],
+			                    (byte)element.m_CEs_.length, 
+			                    t.m_maxExpansions_);
+			    if (isJamo(element.m_cPoints_.charAt(0))){
+			        t.m_collator_.m_isJamoSpecial_ = true;
+			        setMaxJamoExpansion(element.m_cPoints_.charAt(0),
+			                            element.m_CEs_[element.m_CEs_.length 
+                                                                          - 1],
+			                            (byte)element.m_CEs_.length,
+			                            t.m_maxJamoExpansions_);
+			    }
+		    }
+	    }
+	
+	    // here we want to add the prefix structure.
+	    // I will try to process it as a reverse contraction, if possible.
+	    // prefix buffer is already reversed.
+	
+	    if (element.m_prefixChars_ != null &&
+            element.m_prefixChars_.length() - element.m_prefix_ > 0) {
+		    // We keep the seen prefix starter elements in a hashtable we need 
+            // it to be able to distinguish between the simple codepoints and 
+            // prefix starters. Also, we need to use it for canonical closure.
+		    Elements composed = new Elements(element);
+		    composed.m_prefix_ = 0;
+		    composed.m_uchars_ = Normalizer.compose(element.m_prefixChars_, 
+                                                    false);
+            composed.m_cPoints_ = composed.m_uchars_;                         
+            composed.m_cPointsOffset_ = 0;
+		    if (t.m_prefixLookup_ != null) {
+		        Elements uCE = (Elements)t.m_prefixLookup_.get(element);
+		        if (uCE != null) { 
+                    // there is already a set of code points here
+		            element.m_mapCE_ = addPrefix(t, uCE.m_mapCE_, element);
+		        } 
+                else { // no code points, so this spot is clean
+		            element.m_mapCE_ = addPrefix(t, CE_NOT_FOUND_, element);
+		            uCE = new Elements(element);
+		            uCE.m_cPoints_ = uCE.m_uchars_;
+		            t.m_prefixLookup_.put(uCE, uCE);
+		        }
+		        if (composed.m_prefixChars_.length() 
+                        != element.m_prefixChars_.length() - element.m_prefix_
+                    || !composed.m_prefixChars_.regionMatches(0,
+                                    element.m_prefixChars_, element.m_prefix_,
+                                    composed.m_prefixChars_.length())) {
+		            // do it!
+		            composed.m_mapCE_ = addPrefix(t, element.m_mapCE_, 
+                                                  composed);
+		        }
+		    }
+	    }
+	
+	    // We need to use the canonical iterator here
+	    // the way we do it is to generate the canonically equivalent strings 
+	    // for the contraction and then add the sequences that pass FCD check
+	    if (element.m_cPoints_.length() - element.m_cPointsOffset_ > 1 
+	        && !(element.m_cPoints_.length() - element.m_cPointsOffset_ == 2 
+            && UTF16.isLeadSurrogate(element.m_cPoints_.charAt(0)) 
+            && UTF16.isTrailSurrogate(element.m_cPoints_.charAt(1)))) { 
+            // this is a contraction, we should check whether a composed form 
+            // should also be included
+	        CanonicalIterator it = new CanonicalIterator(element.m_cPoints_);
+		    String source = it.next();
+		    while (source.length() > 0) {
+		        if (Normalizer.quickCheck(source, Normalizer.FCD) 
+                    != Normalizer.NO) {
+		            element.m_uchars_ = source;
+		            element.m_cPoints_ = element.m_uchars_;
+		            finalizeAddition(t, element);
+		        }
+		        source = it.next();
+		    }
+		
+		    return element.m_mapCE_;
+		} 
+        else {
+		    return finalizeAddition(t, element);  
+		}
+	}
+
+    /**
+     * Pads the argument value to a count of 4
+     * @param something value to pad
+     * @return padded something
+     */
+    private static final int paddedsize(int something) 
+    {
+    	return something + ((something % 4) != 0 ? 4 - (something % 4) : 0);
+    }    
+    
+    /**
+     * Adds an expansion ce to the expansion vector
+     * @param expansions vector to add to
+     * @param value of the expansion
+     * @return the current position of the new element
+     */
+    private static final int addExpansion(Vector expansions, int value) 
+    {
+	    expansions.add(new Integer(value));
+	    return expansions.size() - 1;
+	}
+	
+	/**
+ 	 * Looks for the maximum length of all expansion sequences ending with the 
+ 	 * same collation element. The size required for maxexpansion and maxsize 
+ 	 * is returned if the arrays are too small.
+	 * @param endexpansion the last expansion collation element to be added
+	 * @param expansionsize size of the expansion
+	 * @param maxexpansion data structure to store the maximum expansion data.
+	 * @returns size of the maxexpansion and maxsize used.
+	 */
+	private static int setMaxExpansion(int endexpansion, byte expansionsize,
+	                                   MaxExpansionTable maxexpansion)
+	{
+	    int start = 0;
+	    int limit = maxexpansion.m_endExpansionCE_.size();
+        long unsigned = (long)endexpansion;
+        unsigned &= 0xFFFFFFFFl;
+	
+	    // using binary search to determine if last expansion element is 
+	    // already in the array 
+	    int result = -1;
+	    while (start < limit - 1) {                                                
+	        int mid = start + ((limit - start) >> 1);                                    
+            long unsignedce = ((Integer)maxexpansion.m_endExpansionCE_.get(
+                                                            mid)).intValue(); 
+            unsignedce &= 0xFFFFFFFFl;
+	        if (unsigned <= unsignedce) {                                                   
+	            limit = mid;                                                           
+		    }                                                                        
+		    else {                                                                   
+		      start = mid;                                                           
+		    }                                                                        
+	    } 
+	      
+	    if (((Integer)maxexpansion.m_endExpansionCE_.get(start)).intValue() 
+	                                                         == endexpansion) {                                                     
+	        result = start;  
+	    }                                                                          
+	    else if (((Integer)maxexpansion.m_endExpansionCE_.get(limit)).intValue() 
+	                                                         == endexpansion) {                                                     
+	            result = limit;      
+	    }                                            
+	    if (result > -1) {
+	        // found the ce in expansion, we'll just modify the size if it 
+	        // is smaller
+	        int currentsize = ((Byte)maxexpansion.m_expansionCESize_.get(
+	                                                      result)).byteValue();
+		    if (currentsize < expansionsize) {
+		        maxexpansion.m_expansionCESize_.set(result, 
+		                                          new Byte(expansionsize));
+		    }
+	    }
+	    else {
+		    // we'll need to squeeze the value into the array. initial 
+		    // implementation. shifting the subarray down by 1
+		    maxexpansion.m_endExpansionCE_.insertElementAt(
+		                                           new Integer(endexpansion),
+		                                           start + 1);
+		    maxexpansion.m_endExpansionCE_.insertElementAt(
+		                                           new Integer(expansionsize),
+		                                           start + 1);
+	    }
+	    return maxexpansion.m_endExpansionCE_.size();
+	}
+	
+	/**
+ 	 * Sets the maximum length of all jamo expansion sequences ending with the 
+ 	 * same collation element. The size required for maxexpansion and maxsize 
+ 	 * is returned if the arrays are too small.
+	 * @param ch the jamo codepoint
+	 * @param endexpansion the last expansion collation element to be added
+	 * @param expansionsize size of the expansion
+	 * @param maxexpansion data structure to store the maximum expansion data.
+	 * @returns size of the maxexpansion and maxsize used.
+	 */
+	private static int setMaxJamoExpansion(char ch, int endexpansion,
+	                                       byte expansionsize,
+	                                       MaxJamoExpansionTable maxexpansion)
+	{
+	    boolean isV = true;
+	    if (ch >= 0x1100 && ch <= 0x1112) {
+	        // determines L for Jamo, doesn't need to store this since it is 
+	        // never at the end of a expansion
+	        if (maxexpansion.m_maxLSize_ < expansionsize) {
+	            maxexpansion.m_maxLSize_ = expansionsize;
+	        }
+	        return maxexpansion.m_endExpansionCE_.size();
+	    }
+	
+	    if (ch >= 0x1161 && ch <= 0x1175) {
+	        // determines V for Jamo
+	        if (maxexpansion.m_maxVSize_ < expansionsize) {
+	            maxexpansion.m_maxVSize_ = expansionsize;
+	        }
+	    }
+	
+	    if (ch >= 0x11A8 && ch <= 0x11C2) {
+	        isV = false;
+	        // determines T for Jamo
+	        if (maxexpansion.m_maxTSize_ < expansionsize) {
+	            maxexpansion.m_maxTSize_ = expansionsize;
+	        }
+	    }
+
+        int pos = maxexpansion.m_endExpansionCE_.size();	
+		while (pos > 0) {
+		    pos --;
+		    if (((Integer)maxexpansion.m_endExpansionCE_.get(pos)).intValue() 
+		                                                    == endexpansion) {
+		        return maxexpansion.m_endExpansionCE_.size();
+		    }
+		}
+		maxexpansion.m_endExpansionCE_.add(new Integer(endexpansion));
+		maxexpansion.m_isV_.add(new Boolean(isV));
+		  
+		return maxexpansion.m_endExpansionCE_.size();
+	}
+	
+	/**
+	 * Adds a prefix to the table
+	 * @param t build table to update
+	 * @param CE collation element to add
+	 * @param element rule element to add
+	 * @return modified ce
+	 */
+	private static int addPrefix(BuildTable t, int CE, Elements element) 
+	{
+	    // currently the longest prefix we're supporting in Japanese is two 
+	    // characters long. Although this table could quite easily mimic 
+	    // complete contraction stuff there is no good reason to make a general 
+	    // solution, as it would require some error prone messing.
+	    ContractionTable contractions = t.m_contractions_;
+	    String oldCP = element.m_cPoints_;
+	    int oldCPOffset = element.m_cPointsOffset_;
+	    
+	    contractions.m_currentTag_ = CE_SPEC_PROC_TAG_;
+	    // here, we will normalize & add prefix to the table.
+	    int size = element.m_prefixChars_.length() - element.m_prefix_;
+	    for (int j = 1; j < size; j ++) {   
+	        // First add NFD prefix chars to unsafe CP hash table 
+	        // Unless it is a trail surrogate, which is handled algoritmically 
+	        // and shouldn't take up space in the table.
+	        char ch = element.m_prefixChars_.charAt(j + element.m_prefix_);
+	        if (!UTF16.isTrailSurrogate(ch)) {
+	            unsafeCPSet(t.m_unsafeCP_, ch);
+	        }
+	    }
+	    
+	    StringBuffer reversed = new StringBuffer();
+	    for (int j = 0; j < size; j ++) { 
+	    	// prefixes are going to be looked up backwards
+	        // therefore, we will promptly reverse the prefix buffer...
+	        int offset = element.m_prefixChars_.length() - j - 1;
+	        reversed.append(element.m_prefixChars_.charAt(offset));
+	    }
+	    element.m_prefixChars_ = reversed.toString();
+	    element.m_prefix_ = 0;
+	
+	    // the first codepoint is also unsafe, as it forms a 'contraction' with 
+	    // the prefix
+	    if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt(0))) {
+	        unsafeCPSet(t.m_unsafeCP_, element.m_cPoints_.charAt(0));
+	    }
+		
+	    element.m_cPoints_ = element.m_prefixChars_;
+	    element.m_cPointsOffset_ = element.m_prefix_;
+	
+	    // Add the last char of the contraction to the contraction-end hash 
+	    // table. unless it is a trail surrogate, which is handled 
+	    // algorithmically and shouldn't be in the table
+	    if (!UTF16.isTrailSurrogate(
+	            element.m_cPoints_.charAt(element.m_cPoints_.length() - 1))) {
+	        ContrEndCPSet(t.m_contrEndCP_, element.m_cPoints_.charAt(
+	                                         element.m_cPoints_.length() - 1));
+	    }
+	    // First we need to check if contractions starts with a surrogate
+	    int cp = UTF16.charAt(element.m_cPoints_, element.m_cPointsOffset_);
+	
+	    // If there are any Jamos in the contraction, we should turn on special 
+	    // processing for Jamos
+	    if (isJamo(element.m_prefixChars_.charAt(element.m_prefix_))) {
+	        t.m_collator_.m_isJamoSpecial_ = true;
+	    }
+	    // then we need to deal with it 
+	    // we could aready have something in table - or we might not 
+	    if (!isPrefix(CE)) { 
+	        // if it wasn't contraction, we wouldn't end up here
+	        int firstContractionOffset = addContraction(contractions, 
+                                                 CONTRACTION_TABLE_NEW_ELEMENT_, 
+	                                                    (char)0, CE);
+	        int newCE = processContraction(contractions, element, 
+	                                       CE_NOT_FOUND_);
+	        int contractionOffset = addContraction(contractions, 
+                              firstContractionOffset, 
+                              element.m_prefixChars_.charAt(element.m_prefix_), 
+                              newCE);
+	        contractionOffset = addContraction(contractions, 
+	                                           firstContractionOffset, 
+	                                           (char)0xFFFF, 
+	                                           CE);
+	        CE = constructSpecialCE(CE_SPEC_PROC_TAG_, firstContractionOffset);
+	    } 
+	    else { 
+	        // we are adding to existing contraction 
+	        // there were already some elements in the table, so we need to add 
+	        // a new contraction 
+	        // Two things can happen here: either the codepoint is already in 
+	        // the table, or it is not
+	        char ch = element.m_prefixChars_.charAt(element.m_prefix_);
+	        int position = findCP(contractions, CE, ch);
+	        if (position > 0) {       
+	            // if it is we just continue down the chain 
+	            int eCE = getCE(contractions, CE, position);
+	            int newCE = processContraction(contractions, element, eCE);
+	            setContraction(contractions, CE, position, ch, newCE);
+	        } 
+	        else {                  
+	            // if it isn't, we will have to create a new sequence 
+	            processContraction(contractions, element, CE_NOT_FOUND_);
+	            insertContraction(contractions, CE, ch, element.m_mapCE_);
+	        }
+	    }
+	
+	    element.m_cPoints_ = oldCP;
+	    element.m_cPointsOffset_ = oldCPOffset;
+	
+	    return CE;
+	}
+	
+	/**
+	 * Checks if the argument ce is a contraction
+	 * @param CE collation element
+	 * @return true if argument ce is a contraction
+	 */
+	private static final boolean isContraction(int CE) 
+	{
+		return isSpecial(CE) && (getCETag(CE) == CE_CONTRACTION_TAG_);
+	}
+	
+	/**
+	 * Checks if the argument ce has a prefix
+	 * @param CE collation element
+	 * @return true if argument ce has a prefix
+	 */
+	private static final boolean isPrefix(int CE) 
+	{
+		return isSpecial(CE) && (getCETag(CE) == CE_SPEC_PROC_TAG_);
+	}
+	
+	/**
+	 * Checks if the argument ce is special
+	 * @param CE collation element
+	 * @return true if argument ce is special
+	 */
+	private static final boolean isSpecial(int CE) 
+	{
+		return ((CE & RuleBasedCollator.CE_SPECIAL_FLAG_) >> 28) == 0xF;
+	}
+	
+	/**
+	 * Checks if the argument ce has a prefix
+	 * @param CE collation element
+	 * @return true if argument ce has a prefix
+	 */
+	private static final int getCETag(int CE) 
+	{
+	    return (CE & RuleBasedCollator.CE_TAG_MASK_) >> 
+	           RuleBasedCollator.CE_TAG_SHIFT_;
+	}
+	
+	/**
+	 * Gets the ce at position in contraction table
+	 * @param table contraction table
+	 * @param position offset to the contraction table
+	 * @return ce
+	 */
+	private static final int getCE(ContractionTable table, int element, 
+	                               int position) 
+	{
+		element &= 0xFFFFFF;
+        BasicContractionTable tbl = null;
+
+        if (element == 0xFFFFFF || table.m_elements_.get(element) == null) {
+            tbl = null;
+        } 
+        else {
+        	tbl = (BasicContractionTable)table.m_elements_.get(element);
+        }	
+        
+    	if (tbl == null) {
+            return CE_NOT_FOUND_;
+        }
+		if (position > tbl.m_CEs_.size() || position == -1) {
+		    return CE_NOT_FOUND_;
+		} 
+		else {
+		    return ((Integer)tbl.m_CEs_.get(position)).intValue();
+		}
+	}
+	
+	/**
+	 * Sets the unsafe character
+	 * @param table unsafe table
+	 * @param c character to be added
+	 */
+	private static final void unsafeCPSet(byte table[], char c) 
+	{
+	    int hash = c;
+	    if (hash >= (UNSAFECP_TABLE_SIZE_ << 3)) {
+	        if (hash >= 0xd800 && hash <= 0xf8ff) {
+	            // Part of a surrogate, or in private use area. 
+	            // These don't go in the table                            
+	            return;
+	        }
+	        hash = (hash & UNSAFECP_TABLE_MASK_) + 256;
+	    }
+	    table[hash >> 3] |= (1 << (hash & 7));
+	}
+	
+	/**
+	 * Sets the contraction end character
+	 * @param table contraction end table
+	 * @param c character to be added
+	 */
+	private static final void ContrEndCPSet(byte table[], char c) 
+	{
+	    int hash = c;
+	    if (hash >= (UNSAFECP_TABLE_SIZE_ << 3)) {
+	        hash = (hash & UNSAFECP_TABLE_MASK_) + 256;
+	    }
+	    table[hash >> 3] |= (1 << (hash & 7));
+	}
+	
+	/** 
+	 * Adds more contractions in table. If element is non existant, it creates 
+	 * on. Returns element handle 
+	 * @param table contraction table
+	 * @param element offset to the contraction table
+	 * @param codePoint codepoint to add
+	 * @param value
+	 * @return collation element
+	 */
+    private static int addContraction(ContractionTable table, int element, 
+                                      char codePoint, int value) 
+    {
+	    BasicContractionTable tbl = getBasicContractionTable(table, element);
+	    if (tbl == null) {
+	        tbl = addAContractionElement(table, element);
+	        element = table.m_elements_.size() - 1;
+	    } 
+	
+	    tbl.m_CEs_.add(new Integer(value));
+	    tbl.m_codePoints_.append(codePoint);
+	    return constructSpecialCE(table.m_currentTag_, element);
+	}
+
+    /**
+     * Adds a contraction element to the table
+     * @param table contraction table to update
+     * @param offset to add at, this will be modified
+     * @return contraction 
+     */
+    private static BasicContractionTable addAContractionElement(
+                                                       ContractionTable table, 
+                                                       int offset) 
+    {
+	    BasicContractionTable result = new BasicContractionTable();
+	    table.m_elements_.add(result);
+	    return result;
+	}
+
+    /**
+     * Constructs a special ce
+     * @param tag special tag
+     * @param ce 
+     * @return a contraction ce
+     */
+	private static final int constructSpecialCE(int tag, int CE) 
+	{
+		return RuleBasedCollator.CE_SPECIAL_FLAG_ 
+		        | (tag << RuleBasedCollator.CE_TAG_SHIFT_) | (CE & 0xFFFFFF);
+	}
+	
+	/**
+	 * Sets and inserts the element that has a contraction
+	 * @param contraction table 
+	 * @param element contracting element
+	 * @param existingCE
+	 * @return contraction ce
+	 */
+	private static int processContraction(ContractionTable contractions, 
+	                                      Elements element, 
+	                                      int existingCE) 
+    {
+	    int firstContractionOffset = 0;
+	    int contractionOffset = 0;
+
+	    // end of recursion 
+	    if (element.m_cPoints_.length() - element.m_cPointsOffset_ == 1) {
+	        if (isContractionTableElement(existingCE) 
+	            && getCETag(existingCE) == contractions.m_currentTag_) {
+	            changeContraction(contractions, existingCE, (char)0, 
+	                              element.m_mapCE_);
+	            changeContraction(contractions, existingCE, (char)0xFFFF,
+                                  element.m_mapCE_);
+	            return existingCE;
+	        } 
+	        else {
+	        	// can't do just that. existingCe might be a contraction, 
+	        	// meaning that we need to do another step
+	            return element.m_mapCE_; 
+	        }
+	    }
+	
+	    // this recursion currently feeds on the only element we have... 
+	    // We will have to copy it in order to accomodate for both backward 
+	    // and forward cycles
+	    // we encountered either an empty space or a non-contraction element 
+	    // this means we are constructing a new contraction sequence 
+	    element.m_cPointsOffset_ ++;
+	    if (!isContractionTableElement(existingCE)) { 
+	        // if it wasn't contraction, we wouldn't end up here
+	        firstContractionOffset = addContraction(contractions, 
+	                                           CONTRACTION_TABLE_NEW_ELEMENT_, 
+	                                           (char)0, existingCE);
+	        int newCE = processContraction(contractions, element, 
+	                                       CE_NOT_FOUND_);
+	        contractionOffset = addContraction(contractions, 
+	                                           firstContractionOffset, 
+	                       element.m_cPoints_.charAt(element.m_cPointsOffset_), 
+	                                           newCE);
+	        contractionOffset = addContraction(contractions, 
+	                                           firstContractionOffset, 
+	                                           (char)0xFFFF, existingCE);
+	        existingCE = constructSpecialCE(contractions.m_currentTag_, 
+	                                        firstContractionOffset);
+	    } 
+	    else { 
+	        // we are adding to existing contraction
+	        // there were already some elements in the table, so we need to add 
+	        // a new contraction 
+	        // Two things can happen here: either the codepoint is already in 
+	        // the table, or it is not
+	        int position = findCP(contractions, existingCE, 
+                          element.m_cPoints_.charAt(element.m_cPointsOffset_));
+	        if (position > 0) {       
+	            // if it is we just continue down the chain 
+	            int eCE = getCE(contractions, existingCE, position);
+	            int newCE = processContraction(contractions, element, eCE);
+	            setContraction(contractions, existingCE, position, 
+	                       element.m_cPoints_.charAt(element.m_cPointsOffset_), 
+	                           newCE);
+	        } 
+	        else {  
+	            // if it isn't, we will have to create a new sequence 
+	            int newCE = processContraction(contractions, element, 
+	                                           CE_NOT_FOUND_);
+	            insertContraction(contractions, existingCE, 
+	                       element.m_cPoints_.charAt(element.m_cPointsOffset_), 
+	                              newCE);
+	        }
+	    }
+	    element.m_cPointsOffset_ --;
+	    return existingCE;
+	}
+	
+	/**
+	 * Checks if CE belongs to the contraction table
+	 * @param CE collation element to test
+	 * @return true if CE belongs to the contraction table
+	 */
+	private static final boolean isContractionTableElement(int CE) 
+	{ 
+		return isSpecial(CE) && getCETag(CE) == CE_CONTRACTION_TAG_
+		       || getCETag(CE) == CE_SPEC_PROC_TAG_;
+	}
+	
+	/**
+	 * Gets the codepoint 
+	 * @param table contraction table
+	 * @param element offset to the contraction element in the table
+	 * @param codePoint code point to look for
+	 * @return the offset to the code point
+	 */
+	private static int findCP(ContractionTable table, int element, 
+	                          char codePoint) 
+	{
+		BasicContractionTable tbl = getBasicContractionTable(table, element);
+		if (tbl == null) {
+	        return -1;
+	    }
+	
+	    int position = 0;
+	    while (codePoint > tbl.m_codePoints_.charAt(position)) {
+	         position ++;
+	         if (position > tbl.m_codePoints_.length()) {
+	             return -1;
+	         }
+	    }
+	    if (codePoint == tbl.m_codePoints_.charAt(position)) {
+	        return position;
+	    } 
+	    else {
+	        return -1;
+	    }
+    }
+
+    /**
+     * Gets the contraction element out of the contraction table
+     * @param table contraction table
+     * @param offset to the element in the contraction table
+     * @return basic contraction element at offset in the contraction table
+     */
+    private static final BasicContractionTable getBasicContractionTable(
+                                                     ContractionTable table,
+                                                     int offset) 
+    {
+    	offset &= 0xFFFFFF;
+    	if (offset == 0xFFFFFF) {
+    		return null;
+    	}
+	    return (BasicContractionTable)table.m_elements_.get(offset);
+    }
+    
+    /**
+     * Changes the contraction element
+     * @param table contraction table
+     * @param element offset to the element in the contraction table
+     * @param codePoint codepoint 
+     * @param newCE new collation element
+     * @return basic contraction element at offset in the contraction table
+     */
+    private static final int changeContraction(ContractionTable table, 
+                                               int element, char codePoint, 
+                                               int newCE) 
+    {
+	    BasicContractionTable tbl = getBasicContractionTable(table, element);	
+	    if (tbl == null) {
+	        return 0;
+	    }
+	    int position = 0;
+	    while (codePoint > tbl.m_codePoints_.charAt(position)) {
+	        position ++;
+	        if (position > tbl.m_codePoints_.length()) {
+	            return CE_NOT_FOUND_;
+	        }
+	    }
+	    if (codePoint == tbl.m_codePoints_.charAt(position)) {
+	        tbl.m_CEs_.set(position, new Integer(newCE));
+	        return element & 0xFFFFFF;
+	    } 
+	    else {
+	        return CE_NOT_FOUND_;
+	    }
+	}
+	
+	/** 
+	 * Sets a part of contraction sequence in table. If element is non 
+	 * existant, it creates on. Returns element handle.
+	 * @param table contraction table
+	 * @param element offset to the contraction table
+	 * @param offset
+	 * @param codePoint contraction character
+	 * @param value ce value
+	 * @return new contraction ce
+	 */
+    private static final int setContraction(ContractionTable table, 
+                                            int element, int offset, 
+                                            char codePoint, int value) 
+    {
+    	element &= 0xFFFFFF;
+	    BasicContractionTable tbl = getBasicContractionTable(table, element);	
+	    if (tbl == null) {
+	        tbl = addAContractionElement(table, element);
+	        element = table.m_elements_.size() - 1;
+	    }
+	
+	    tbl.m_CEs_.set(offset, new Integer(value));
+	    tbl.m_codePoints_.setCharAt(offset, codePoint);
+	    return constructSpecialCE(table.m_currentTag_, element);
+	}
+	
+	/** 
+	 * Inserts a part of contraction sequence in table. Sequences behind the 
+	 * offset are moved back. If element is non existent, it creates on. 
+	 * @param table contraction
+	 * @param element offset to the table contraction
+	 * @param codePoint code point
+	 * @param value collation element value
+	 * @return contraction collation element
+	 */
+    private static final int insertContraction(ContractionTable table, 
+                                               int element, char codePoint, 
+                                               int value) 
+    {
+	    element &= 0xFFFFFF;
+	    BasicContractionTable tbl = getBasicContractionTable(table, element);
+	    if (tbl == null) {
+	        tbl = addAContractionElement(table, element);
+	        element = table.m_elements_.size() - 1;
+	    }
+	
+	    int offset = 0;
+	    while (tbl.m_codePoints_.charAt(offset) < codePoint 
+	           && offset < tbl.m_codePoints_.length()) {
+	        offset ++;
+	    }
+	
+	    tbl.m_CEs_.insertElementAt(new Integer(value), offset);
+	    tbl.m_codePoints_.insert(offset, codePoint);
+	
+	    return constructSpecialCE(table.m_currentTag_, element);
+	}
+	
+	/**
+	 * Finalize addition
+	 * @param t build table
+	 * @param element to add
+	 */
+	private final static int finalizeAddition(BuildTable t, Elements element) 
+	{
+		int CE = CE_NOT_FOUND_;
+		if (element.m_cPoints_.length() - element.m_cPointsOffset_ > 1) { 
+		    // we're adding a contraction
+		    int cp = UTF16.charAt(element.m_cPoints_, element.m_cPointsOffset_);
+		    CE = TrieBuilder.get32(t.m_mapping_, cp);
+		    CE = addContraction(t, CE, element);
+		} 
+		else { 
+		    // easy case
+		    CE = TrieBuilder.get32(t.m_mapping_, element.m_cPoints_.charAt(
+		                                           element.m_cPointsOffset_));
+		
+		    if (CE != CE_NOT_FOUND_) {
+		        if(isContractionTableElement(CE)) { 
+		            // adding a non contraction element (thai, expansion, 
+		            // single) to already existing contraction 
+			        if (!isPrefix(element.m_mapCE_)) { 
+			        	// we cannot reenter prefix elements - as we are going 
+			        	// to create a dead loop
+			            // Only expansions and regular CEs can go here... 
+			            // Contractions will never happen in this place
+			            setContraction(t.m_contractions_, CE, 0, (char)0, 
+			                           element.m_mapCE_);
+			            // This loop has to change the CE at the end of 
+			            // contraction REDO!
+			            changeLastCE(t.m_contractions_, CE, element.m_mapCE_);
+			        }
+		        } 
+		        else {
+		            TrieBuilder.set32(t.m_mapping_, element.m_cPoints_.charAt(
+		                                             element.m_cPointsOffset_), 
+		                               element.m_mapCE_);
+		        }
+		    } 
+		    else {
+		        TrieBuilder.set32(t.m_mapping_, element.m_cPoints_.charAt(
+                                                     element.m_cPointsOffset_), 
+                                   element.m_mapCE_);
+		    }
+		}
+		return CE;
+	}
+	
+	/** 
+	 * Note regarding surrogate handling: We are interested only in the single
+	 * or leading surrogates in a contraction. If a surrogate is somewhere else
+	 * in the contraction, it is going to be handled as a pair of code units,
+	 * as it doesn't affect the performance AND handling surrogates specially
+	 * would complicate code way too much.
+	 */
+	private static int addContraction(BuildTable t, int CE, Elements element) 
+    {
+	    ContractionTable contractions = t.m_contractions_;
+	    contractions.m_currentTag_ = CE_CONTRACTION_TAG_;
+	
+	    // First we need to check if contractions starts with a surrogate
+	    int cp = UTF16.charAt(element.m_cPoints_, 0);
+	    int cpsize = 1;
+	    if (UCharacter.isSupplementary(cp)) {
+	    	cpsize = 2;
+	    }
+	    if (cpsize < element.m_cPoints_.length()) { 
+	    	// This is a real contraction, if there are other characters after 
+	    	// the first
+	    	int size = element.m_cPoints_.length() - element.m_cPointsOffset_;
+	        for (int j = 1; j < size; j ++) {   
+	            // First add contraction chars to unsafe CP hash table 
+	            // Unless it is a trail surrogate, which is handled 
+	            // algoritmically and shouldn't take up space in the table.
+	            if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt(
+	                                         element.m_cPointsOffset_ + j))) {
+	                 unsafeCPSet(t.m_unsafeCP_, 
+	                             element.m_cPoints_.charAt(
+	                                           element.m_cPointsOffset_ + j));
+	            }
+	        }
+	        // Add the last char of the contraction to the contraction-end 
+	        // hash table. unless it is a trail surrogate, which is handled 
+	        // algorithmically and shouldn't be in the table
+	        if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt(
+                                            element.m_cPoints_.length() -1))) {
+	            ContrEndCPSet(t.m_contrEndCP_, 
+	                          element.m_cPoints_.charAt(
+                                              element.m_cPoints_.length() -1));
+	        }
+	
+	        // If there are any Jamos in the contraction, we should turn on 
+	        // special processing for Jamos
+	        if (isJamo(element.m_cPoints_.charAt(element.m_cPointsOffset_))) {
+	            t.m_collator_.m_isJamoSpecial_ = true;
+	        }
+	        // then we need to deal with it 
+	        // we could aready have something in table - or we might not 
+	        element.m_cPointsOffset_ += cpsize;
+	        if (!isContraction(CE)) { 
+		        // if it wasn't contraction, we wouldn't end up here
+		        int firstContractionOffset = addContraction(contractions, 
+		                          CONTRACTION_TABLE_NEW_ELEMENT_, (char)0, CE);
+		        int newCE = processContraction(contractions, element, 
+		                                       CE_NOT_FOUND_);
+		        int contractionOffset = addContraction(contractions, 
+		                                               firstContractionOffset, 
+                          element.m_cPoints_.charAt(element.m_cPointsOffset_), 
+                                                       newCE);
+		        contractionOffset = addContraction(contractions, 
+		                                           firstContractionOffset, 
+		                                           (char)0xFFFF, CE);
+		        CE = constructSpecialCE(CE_CONTRACTION_TAG_, 
+		                                firstContractionOffset);
+		    } 
+		    else { 
+		        // we are adding to existing contraction 
+		        // there were already some elements in the table, so we need to 
+		        // add a new contraction
+		        // Two things can happen here: either the codepoint is already 
+		        // in the table, or it is not 
+		        int position = findCP(contractions, CE, 
+                          element.m_cPoints_.charAt(element.m_cPointsOffset_));
+		        if (position > 0) {       
+		            // if it is we just continue down the chain
+		            int eCE = getCE(contractions, CE, position);
+		            int newCE = processContraction(contractions, element, eCE);
+		            setContraction(contractions, CE, position, 
+		                  element.m_cPoints_.charAt(element.m_cPointsOffset_), 
+		                  newCE);
+		        } 
+		        else {                  
+		            // if it isn't, we will have to create a new sequence 
+		            int newCE = processContraction(contractions, element, 
+		                                           CE_NOT_FOUND_);
+		            insertContraction(contractions, CE, 
+                           element.m_cPoints_.charAt(element.m_cPointsOffset_), 
+                                      newCE);
+		        }
+		    }
+		    element.m_cPointsOffset_ -= cpsize;
+	        TrieBuilder.set32(t.m_mapping_, cp, CE);
+	    } 
+	    else if (!isContraction(CE)) { 
+	        // this is just a surrogate, and there is no contraction 
+	        TrieBuilder.set32(t.m_mapping_, cp, element.m_mapCE_);
+	    } 
+	    else { 
+	        // fill out the first stage of the contraction with the surrogate 
+	        // CE 
+	        changeContraction(contractions, CE, (char)0, element.m_mapCE_);
+	        changeContraction(contractions, CE, (char)0xFFFF, element.m_mapCE_);
+	    }
+	    return CE;
+	}
+	
+	/** 
+	 * this is for adding non contractions 
+	 * @param table contraction table
+	 * @param element offset to the contraction table
+	 * @param value collation element value
+	 * @return new collation element 
+	 */
+    private static final int changeLastCE(ContractionTable table, int element, 
+                                          int value) 
+    {
+	    BasicContractionTable tbl = getBasicContractionTable(table, element);
+	    if (tbl == null) {
+	        return 0;
+	    }
+	
+	    tbl.m_CEs_.set(tbl.m_CEs_.size() - 1, new Integer(value));
+	    return constructSpecialCE(table.m_currentTag_, element & 0xFFFFFF);
+	}
+    
+    /**
+     * Given a set of ranges calculated by allocWeights(), iterate through the 
+     * weights. Sets the next weight in cegenerator.m_current_.
+     * @param cegenerator object that contains ranges weight range array and
+     *        its rangeCount
+     * @return the next weight
+     */
+    private static int nextWeight(CEGenerator cegenerator) 
+    {
+        if (cegenerator.m_rangesLength_ > 0) {
+            // get maxByte from the .count field
+            int maxByte = cegenerator.m_ranges_[0].m_count_;
+            // get the next weight 
+            int weight = cegenerator.m_ranges_[0].m_start_;
+            if (weight == cegenerator.m_ranges_[0].m_end_) {
+                // this range is finished, remove it and move the following 
+                // ones up 
+                cegenerator.m_rangesLength_ --;
+                if (cegenerator.m_rangesLength_ > 0) {
+                    System.arraycopy(cegenerator.m_ranges_, 1, 
+                                     cegenerator.m_ranges_, 0, 
+                                     cegenerator.m_rangesLength_);
+                    cegenerator.m_ranges_[0].m_count_ = maxByte; 
+                    // keep maxByte in ranges[0]
+                }
+            } 
+            else {
+                // increment the weight for the next value
+                cegenerator.m_ranges_[0].m_start_ 
+                      = incWeight(weight, cegenerator.m_ranges_[0].m_length2_, 
+                                  maxByte);
+            }
+            return weight;
+        }
+        return -1;
+    }
+    
+    /**
+     * Increment the collation weight
+     * @param weight to increment
+     * @param length
+     * @param maxByte
+     * @return new incremented weight
+     */
+    private static final int incWeight(int weight, int length, int maxByte) 
+    {
+        while (true) {
+            int b = getWeightByte(weight, length);
+            if (b < maxByte) {
+                return setWeightByte(weight, length, b + 1);
+            } 
+            else {
+                // roll over, set this byte to BYTE_FIRST_TAILORED_ and 
+                // increment the previous one
+                weight = setWeightByte(weight, length, 
+                                       RuleBasedCollator.BYTE_FIRST_TAILORED_);
+                -- length;
+            }
+        }
+    }
+    
+    /**
+     * Gets the weight byte
+     * @param weight
+     * @param index
+     * @return byte
+     */
+    private static final int getWeightByte(int weight, int index) 
+    {
+        return (weight >> ((4 - index) << 3)) & 0xff;
+    }
+    
+    /**
+     * Set the weight byte in table
+     * @param weight 
+     * @param index
+     * @param b byte
+     */
+    private static final int setWeightByte(int weight, int index, int b) 
+    {
+        index <<= 3;
+        // 0xffffffff except a 00 "hole" for the index-th byte
+        int mask = 0xffffffff >>> index;
+        index = 32 - index;
+        mask |= 0xffffff00 << index;
+        return (weight & mask) | (b << index);
+    }
+    
+    /**
+     * Call getWeightRanges and then determine heuristically which ranges to 
+     * use for a given number of weights between (excluding) two limits
+     * @param lowerlimit
+     * @param upperlimit
+     * @param n
+     * @param maxByte
+     * @param ranges
+     * @return
+     */
+    private static int allocateWeights(int lowerLimit, int upperLimit, int n,
+                                       int maxByte, WeightRange ranges[]) 
+    {
+        // number of usable byte values 3..maxByte
+        int countBytes = maxByte - RuleBasedCollator.BYTE_FIRST_TAILORED_ + 1;
+        // [0] unused, [5] to make index checks unnecessary 
+        int lengthCounts[] = new int[6];     
+        // countBytes to the power of index 
+        int powers[] = new int[5];
+        // gcc requires explicit initialization 
+        powers[0] = 1;
+        powers[1] = countBytes;
+        powers[2] = powers[1] * countBytes;
+        powers[3] = powers[2] * countBytes;
+        powers[4] = powers[3] * countBytes;
+        int rangeCount = getWeightRanges(lowerLimit, upperLimit, maxByte, 
+                                         countBytes, ranges);
+        if (rangeCount <= 0) {
+            return 0;
+        }
+        // what is the maximum number of weights with these ranges?
+        int maxCount = 0;
+        for (int i = 0; i < rangeCount; ++ i) {
+            maxCount += ranges[i].m_count_ * powers[4 - ranges[i].m_length_];
+        }
+        if (maxCount < n) {
+            return 0;
+        }
+        // set the length2 and count2 fields
+        for (int i = 0; i < rangeCount; ++ i) {
+            ranges[i].m_length2_ = ranges[i].m_length_;
+            ranges[i].m_count2_ = ranges[i].m_count_;
+        }
+        // try until we find suitably large ranges
+        while (true) {
+            // get the smallest number of bytes in a range
+            int minLength = ranges[0].m_length2_;
+            // sum up the number of elements that fit into ranges of each byte 
+            // length
+            Arrays.fill(lengthCounts, 0);
+            for (int i = 0; i < rangeCount; ++ i) {
+                lengthCounts[ranges[i].m_length2_] += ranges[i].m_count2_;
+            }
+            // now try to allocate n elements in the available short ranges 
+            if (n <= lengthCounts[minLength] + lengthCounts[minLength + 1]) {
+                // trivial cases, use the first few ranges
+                maxCount = 0;
+                rangeCount = 0;
+                do {
+                    maxCount += ranges[rangeCount].m_count2_;
+                    ++ rangeCount;
+                } while (n > maxCount);
+                break;
+            } 
+            else if (n <= ranges[0].m_count2_ * countBytes) {
+                // easy case, just make this one range large enough by 
+                // lengthening it once more, possibly split it
+                rangeCount = 1;
+                int maxLength = minLength + 1;
+                // calculate how to split the range between maxLength-1 
+                // (count1) and maxLength (count2) 
+                int power_1 = powers[minLength - ranges[0].m_length_];
+                int power = power_1 * countBytes;
+                int count2 = (n + power - 1) / power;
+                int count1 = ranges[0].m_count_ - count2;
+                // split the range
+                if (count1 < 1) {
+                    // lengthen the entire range to maxLength 
+                    lengthenRange(ranges, 0, maxByte, countBytes);
+                } 
+                else {
+                    // really split the range
+                    // create a new range with the end and initial and current 
+                    // length of the old one
+                    rangeCount = 2;
+                    ranges[1].m_end_ = ranges[0].m_end_;
+                    ranges[1].m_length_ = ranges[0].m_length_;
+                    ranges[1].m_length2_ = minLength;
+                    // set the end of the first range according to count1
+                    int i = ranges[0].m_length_;
+                    int b = getWeightByte(ranges[0].m_start_, i) + count1 - 1;
+                    // ranges[0].count and count1 may be >countBytes from 
+                    // merging adjacent ranges; b > maxByte is possible
+                    if (b <= maxByte) {
+                        ranges[0].m_end_ = setWeightByte(ranges[0].m_start_, i, 
+                                                         b);
+                    } 
+                    else {
+                        ranges[0].m_end_ = setWeightByte(
+                                           incWeight(ranges[0].m_start_, i - 1, 
+                                                     maxByte), 
+                                           i, b - countBytes);
+                    }
+                    // set the bytes in the end weight at length + 1..length2 
+                    // to maxByte
+                    b = (maxByte << 24) | (maxByte << 16) | (maxByte << 8)
+                        | maxByte; // this used to be 0xffffffff 
+                    ranges[0].m_end_ = truncateWeight(ranges[0].m_end_, i) 
+                                       | (b >> (i << 3)) 
+                                       & (b << ((4 - minLength) << 3));
+                    // set the start of the second range to immediately follow 
+                    // the end of the first one
+                    ranges[1].m_start_ = incWeight(ranges[0].m_end_, minLength, 
+                                                   maxByte);
+                    // set the count values (informational)
+                    ranges[0].m_count_ = count1;
+                    ranges[1].m_count_ = count2;
+    
+                    ranges[0].m_count2_ = count1 * power_1;
+                    // will be *countBytes when lengthened 
+                    ranges[1].m_count2_ = count2 * power_1; 
+    
+                    // lengthen the second range to maxLength
+                    lengthenRange(ranges, 1, maxByte, countBytes);
+                }
+                break;
+            }
+            // no good match, lengthen all minLength ranges and iterate 
+            for (int i=0; ranges[i].m_length2_ == minLength; ++ i) {
+                lengthenRange(ranges, i, maxByte, countBytes);
+            }
+        }
+    
+        if (rangeCount > 1) {
+            // sort the ranges by weight values 
+            Arrays.sort(ranges, 0, rangeCount);
+        }
+    
+        // set maxByte in ranges[0] for ucol_nextWeight()
+        ranges[0].m_count_ = maxByte;
+    
+        return rangeCount;
+    }
+    
+    /**
+     * Updates the range length
+     * @param range weight range array
+     * @param offset to weight range array
+     * @param maxByte
+     * @param countBytes
+     * @return new length
+     */
+    private static final int lengthenRange(WeightRange range[], int offset, 
+                                           int maxByte, int countBytes) 
+    {
+        int length = range[offset].m_length2_ + 1;
+        range[offset].m_start_ = setWeightTrail(range[offset].m_start_, length, 
+                                       RuleBasedCollator.BYTE_FIRST_TAILORED_);
+        range[offset].m_end_ = setWeightTrail(range[offset].m_end_, length, 
+                                              maxByte);
+        range[offset].m_count2_ *= countBytes;
+        range[offset].m_length2_ = length;
+        return length;
+    }
+    
+    /**
+     * Gets the weight 
+     * @param weight
+     * @param length
+     * @param trail
+     * @return new weight
+     */
+    private static final int setWeightTrail(int weight, int length, int trail) 
+    {
+        length = (4 - length) << 3;
+        return (weight & (0xffffff00 << length)) | (trail << length);
+    }
+    
+    /**
+     * take two CE weights and calculate the
+     * possible ranges of weights between the two limits, excluding them
+     * for weights with up to 4 bytes there are up to 2*4-1=7 ranges
+     * @param lowerlimit
+     * @param upperlimit
+     * @param maxByte
+     * @param countBytes
+     * @param ranges
+     * @return weight ranges
+     */
+    private static int getWeightRanges(int lowerLimit, int upperLimit, 
+                                       int maxByte, int countBytes,
+                                       WeightRange ranges[]) 
+    {
+        // assume that both lowerLimit & upperLimit are not 0 
+        // get the lengths of the limits 
+        int lowerLength = lengthOfWeight(lowerLimit);
+        int upperLength = lengthOfWeight(upperLimit);
+        if (lowerLimit >= upperLimit) {
+            return 0;
+        }
+        // check that neither is a prefix of the other
+        if (lowerLength < upperLength) {
+            if (lowerLimit == truncateWeight(upperLimit, lowerLength)) {
+                return 0;
+            }
+        }
+        // if the upper limit is a prefix of the lower limit then the earlier 
+        // test lowerLimit >= upperLimit has caught it
+        // reset local variables
+        // With the limit lengths of 1..4, there are up to 7 ranges for 
+        // allocation:
+        // range     minimum length
+        // lower[4]  4
+        // lower[3]  3
+        // lower[2]  2
+        // middle    1
+        // upper[2]  2
+        // upper[3]  3
+        // upper[4]  4
+        // We are now going to calculate up to 7 ranges.
+        // Some of them will typically overlap, so we will then have to merge 
+        // and eliminate ranges.
+        int weight = lowerLimit;
+        WeightRange lower[] = {new WeightRange(), new WeightRange(), 
+                               new WeightRange(), new WeightRange(), 
+                               new WeightRange()}; 
+        for (int length = lowerLength; length >= 2; -- length) {
+            int trail = getWeightByte(weight, length);
+            if (trail < maxByte) {
+                lower[length].m_start_ = incWeightTrail(weight, length);
+                lower[length].m_end_ = setWeightTrail(weight, length, maxByte);
+                lower[length].m_length_ = length;
+                lower[length].m_count_ = maxByte - trail;
+            }
+            weight = truncateWeight(weight, length - 1);
+        }
+        WeightRange middle = new WeightRange(); 
+        middle.m_start_ = incWeightTrail(weight, 1);
+    
+        weight = upperLimit;
+        // [0] and [1] are not used - this simplifies indexing 
+        WeightRange upper[] = {new WeightRange(), new WeightRange(), 
+                               new WeightRange(), new WeightRange(), 
+                               new WeightRange()}; 
+        for (int length = upperLength; length >= 2; -- length) {
+            int trail = getWeightByte(weight, length);
+            if (trail > RuleBasedCollator.BYTE_FIRST_TAILORED_) {
+                upper[length].m_start_ = setWeightTrail(weight, length, 
+                                       RuleBasedCollator.BYTE_FIRST_TAILORED_);
+                upper[length].m_end_ = decWeightTrail(weight, length);
+                upper[length].m_length_ = length;
+                upper[length].m_count_ = trail
+                                     - RuleBasedCollator.BYTE_FIRST_TAILORED_;
+            }
+            weight = truncateWeight(weight, length - 1);
+        }
+        middle.m_end_ = decWeightTrail(weight, 1);
+    
+        // set the middle range
+        middle.m_length_ = 1;
+        if (middle.m_end_ >= middle.m_start_) {
+            middle.m_count_ = ((middle.m_end_ - middle.m_start_) >> 24) + 1;
+        } 
+        else {
+            // eliminate overlaps
+            // remove the middle range
+            middle.m_count_ = 0;
+            // reduce or remove the lower ranges that go beyond upperLimit
+            for (int length = 4; length >= 2; -- length) {
+                if (lower[length].m_count_>0 && upper[length].m_count_ > 0) {
+                    int start = upper[length].m_start_;
+                    int end = lower[length].m_end_;
+                    if (end >= start || incWeight(end, length, maxByte) 
+                                        == start) {
+                        // lower and upper ranges collide or are directly 
+                        // adjacent: merge these two and remove all shorter 
+                        // ranges
+                        start = lower[length].m_start_;
+                        end = lower[length].m_end_ = upper[length].m_end_;
+                        // merging directly adjacent ranges needs to subtract 
+                        // the 0/1 gaps in between;
+                        // it may result in a range with count>countBytes
+                        lower[length].m_count_ = getWeightByte(end, length)
+                                  - getWeightByte(start, length) + 1 
+                                  + countBytes * (getWeightByte(end, length - 1)
+                                                  - getWeightByte(start, 
+                                                                  length - 1));
+                        upper[length].m_count_=0;
+                        while (-- length >= 2) {
+                            lower[length].m_count_ = upper[length].m_count_ 
+                                                                           = 0;
+                        }
+                        break;
+                    }
+                }
+            }
+        }
+    
+        // copy the ranges, shortest first, into the result array 
+        int rangeCount = 0;
+        if (middle.m_count_ > 0) {
+            ranges[0] = middle;
+            rangeCount = 1;
+        }
+        for (int length = 2; length <= 4; ++ length) {
+            // copy upper first so that later the middle range is more likely 
+            // the first one to use
+            if (upper[length].m_count_ > 0) {
+                ranges[rangeCount] = upper[length];
+                ++ rangeCount;
+            }
+            if (lower[length].m_count_ > 0) {
+                ranges[rangeCount] = lower[length];
+                ++ rangeCount;
+            }
+        }
+        return rangeCount;
+    }
+    
+    /**
+     * Truncates the weight with length
+     * @param weight
+     * @param length
+     * @return truncated weight
+     */
+    private static final int truncateWeight(int weight, int length) 
+    {
+        return weight & (0xffffffff << ((4 - length) << 3));
+    }
+    
+    /**
+     * Length of the weight
+     * @param weight
+     * @return length of the weight
+     */
+    private static final int lengthOfWeight(int weight) 
+    {
+        if ((weight & 0xffffff) == 0) {
+            return 1;
+        } 
+        else if ((weight & 0xffff) == 0) {
+            return 2;
+        } 
+        else if ((weight & 0xff) == 0) {
+            return 3;
+        } 
+        return 4;
+    }
+    
+    /**
+     * Increment the weight trail
+     * @param weight 
+     * @param length
+     * @return new weight
+     */
+    private static final int incWeightTrail(int weight, int length) 
+    {
+        return weight + (1 << ((4-length) << 3));
+    }
+
+    /**
+     * Decrement the weight trail
+     * @param weight 
+     * @param length
+     * @return new weight
+     */
+    private static final int decWeightTrail(int weight, int length) 
+    {
+        return weight - (1 << ((4 - length) << 3));
+    }
+    
+    /**
+     * Checks if the string is tailored in the contraction
+     * @param table contraction table
+     * @param element 
+     * @param array character array to check
+     * @param offset array offset
+     * @return true if it is tailored
+     */
+    private boolean isTailored(ContractionTable table, int element, 
+                               char array[], int offset) 
+    {
+        while (array[offset] != 0) {
+            element = getCE(table, element, array[offset]);
+            if (element == CE_NOT_FOUND_) {
+                return false;
+            }
+            if (!isContractionTableElement(element)) {
+                return true;
+            }
+            offset ++;
+        }
+        if (getCE(table, element, 0) != CE_NOT_FOUND_) {
+            return true;
+        } 
+        else {
+            return false; 
+        }
+    }
+    
+    /**
+     * Assemble RuleBasedCollator
+     * @param t build table
+     * @param collator to update
+     */
+    private void assembleTable(BuildTable t, RuleBasedCollator collator) 
+    {
+        TrieBuilder.BuildTable mapping = t.m_mapping_;
+        Vector expansions = t.m_expansions_;
+        ContractionTable contractions = t.m_contractions_;
+        MaxExpansionTable maxexpansion = t.m_maxExpansions_;
+    
+        int beforeContractions = (HEADER_SIZE_ 
+                                 + paddedsize(expansions.size() << 2)) >>> 1;
+        collator.m_contractionOffset_ = beforeContractions;
+        int contractionsSize 
+                           = constructTable(contractions, beforeContractions);
+    
+        // the following operation depends on the trie data. Therefore, we have 
+        // to do it before the trie is compacted 
+        // sets jamo expansions
+        getMaxExpansionJamo(mapping, maxexpansion, t.m_maxJamoExpansions_,
+                            collator.m_isJamoSpecial_);
+        // TODO: LATIN1 array is now in the utrie - it should be removed from 
+        // the calculation
+        setAttributes(collator, t.m_options_);
+        // copy expansions
+        int size = expansions.size();
+        collator.m_expansion_ = new int[size];
+        for (int i = 0; i < size; i ++) {
+            collator.m_expansion_[i] = ((Integer)expansions.get(i)).intValue();
+        }
+        // contractions block 
+        if (contractionsSize != 0) {
+            // copy contraction index 
+            collator.m_contractionIndex_ = new char[contractionsSize];
+            contractions.m_codePoints_.getChars(0, contractionsSize, 
+                                                collator.m_contractionIndex_, 
+                                                0);
+            // copy contraction collation elements
+            collator.m_contractionCE_ = new int[contractionsSize];
+            for (int i = 0; i < contractionsSize; i ++) {
+                collator.m_contractionCE_[i] = ((Integer)
+                                        contractions.m_CEs_.get(i)).intValue();
+            }
+        } 
+        // copy mapping table
+        collator.m_trie_ = TrieBuilder.createIntTrie(mapping, 
+                               RuleBasedCollator.DataManipulate.getInstance());
+        // copy max expansion table
+        // not copying the first element which is a dummy
+        // to be in synch with icu4c's builder, we continue to use the 
+        // expansion offset
+        collator.m_expansionOffset_ = HEADER_SIZE_ >> 2; 
+        size = maxexpansion.m_endExpansionCE_.size() - 1;
+        collator.m_expansionEndCE_ = new int[size];
+        for (int i = 1; i < size; i ++) {
+            collator.m_expansionEndCE_[i - 1] = ((Integer)
+                             maxexpansion.m_endExpansionCE_.get(i)).intValue();
+        }
+        collator.m_expansionEndCEMaxSize_ = new byte[size];
+        for (int i = 1; i < size; i ++) {
+            collator.m_expansionEndCEMaxSize_[i - 1] = ((Byte)
+                           maxexpansion.m_expansionCESize_.get(i)).byteValue();
+        }
+        // Unsafe chars table.  Finish it off, then copy it.
+        unsafeCPAddCCNZ(t);
+        // Or in unsafebits from UCA, making a combined table.
+        for (int i = 0; i < UNSAFECP_TABLE_SIZE_; i ++) {    
+             t.m_unsafeCP_[i] |= RuleBasedCollator.UCA_.m_unsafe_[i];
+        }
+        collator.m_unsafe_ = t.m_unsafeCP_;
+    
+        // Finish building Contraction Ending chars hash table and then copy it 
+        // out.
+        // Or in unsafebits from UCA, making a combined table
+        for (int i = 0; i < UNSAFECP_TABLE_SIZE_; i ++) {    
+             t.m_contrEndCP_[i] |= RuleBasedCollator.UCA_.m_contractionEnd_[i];
+        }
+        collator.m_contractionEnd_ = t.m_contrEndCP_;
+    }
+    
+    /**
+     * Sets this collator to use the all options and tables in UCA. 
+     * @param collator which attribute is to be set 
+     * @param option to set with
+     */
+    private static final void setAttributes(RuleBasedCollator collator,
+                                          CollationRuleParser.OptionSet option)
+    {
+        collator.m_caseFirst_ = option.m_caseFirst_;
+        collator.setDecomposition(option.m_decomposition_);
+        collator.setAlternateHandling(option.m_isAlternateHandlingShifted_);
+        collator.setCaseLevel(option.m_isCaseLevel_);
+        collator.setFrenchCollation(option.m_isFrenchCollation_);
+        collator.m_isHiragana4_ = option.m_isHiragana4_;
+        collator.setStrength(option.m_strength_);
+        collator.m_variableTopValue_ = option.m_variableTopValue_;    
+    }
+    
+    /**
+     * Constructing the contraction table
+     * @param table contraction table
+     * @param mainOffset
+     * @return 
+     */
+    private int constructTable(ContractionTable table, int mainOffset) 
+    {
+        // See how much memory we need 
+        int tsize = table.m_elements_.size();
+        if (tsize == 0) {
+            return 0;
+        }
+        table.m_offsets_.clear();
+        int position = 0;
+        for (int i = 0; i < tsize; i ++) {
+            table.m_offsets_.add(new Integer(position + mainOffset));
+            position += ((BasicContractionTable)
+                                       table.m_elements_.get(i)).m_CEs_.size();
+        }
+        table.m_CEs_.clear();
+        table.m_codePoints_.delete(0, table.m_codePoints_.length());
+        // Now stuff the things in
+        StringBuffer cpPointer = table.m_codePoints_;
+        Vector CEPointer = table.m_CEs_;
+        for (int i = 0; i < tsize; i ++) {
+            BasicContractionTable bct = (BasicContractionTable)
+                                                      table.m_elements_.get(i);
+            int size = bct.m_CEs_.size();
+            char ccMax = 0;
+            char ccMin = 255;
+            int offset = CEPointer.size();
+            CEPointer.add(bct.m_CEs_.get(0));
+            for (int j = 1; j < size; j ++) {
+                char ch = bct.m_codePoints_.charAt(j);
+                char cc = (char)(UCharacter.getCombiningClass(ch) & 0xFF);
+                if (cc > ccMax) {
+                    ccMax = cc;
+                }
+                if (cc < ccMin) {
+                    ccMin = cc;
+                }
+                cpPointer.append(ch);
+                CEPointer.add(bct.m_CEs_.get(j));
+            }
+            cpPointer.insert(offset, 
+                             (char)(((ccMin == ccMax) ? 1 : 0 << 8) | ccMax));
+            for (int j = 0; j < size; j ++) {
+                if (isContractionTableElement(((Integer)
+                                      CEPointer.get(offset + j)).intValue())) {
+                    int ce = ((Integer)CEPointer.get(offset + j)).intValue();
+                    CEPointer.set(offset + j, 
+                        new Integer(constructSpecialCE(getCETag(ce), 
+                                    ((Integer)table.m_offsets_.get(
+                                      getContractionOffset(ce))).intValue())));
+                }
+            }
+        }
+    
+        for (int i = 0; i <= 0x10FFFF; i ++) {
+            int CE = TrieBuilder.get32(table.m_mapping_, i);
+            if (isContractionTableElement(CE)) {
+                CE = constructSpecialCE(getCETag(CE), 
+                                        ((Integer)table.m_offsets_.get(
+                                        getContractionOffset(CE))).intValue());
+                TrieBuilder.set32(table.m_mapping_, i, CE);
+            }
+        }
+        return position;
+    }
+    
+    /**
+     * Get contraction offset
+     * @param ce collation element 
+     * @return contraction offset
+     */
+    private static final int getContractionOffset(int ce)
+    {
+        return ce & 0xFFFFFF;
+    }
+    
+    /**
+     * Gets the maximum Jamo expansion
+     * @param table trie table
+     * @param maxexpansion maximum expansion table
+     * @param maxjamoexpansion maximum jamo expansion table
+     * @param jamospecial is jamo special?
+     */
+    private static void getMaxExpansionJamo(TrieBuilder.BuildTable mapping, 
+                                            MaxExpansionTable maxexpansion,
+                                            MaxJamoExpansionTable 
+                                                              maxjamoexpansion,
+                                            boolean jamospecial)
+    {
+        int VBASE  = 0x1161;
+        int TBASE  = 0x11A8;
+        int VCOUNT = 21;
+        int TCOUNT = 28;
+        int v = VBASE + VCOUNT - 1;
+        int t = TBASE + TCOUNT - 1;
+        
+        while (v >= VBASE) {
+            int ce = TrieBuilder.get32(mapping, v);
+            if ((ce & RuleBasedCollator.CE_SPECIAL_FLAG_) 
+                                      != RuleBasedCollator.CE_SPECIAL_FLAG_) {
+                setMaxExpansion(ce, (byte)2, maxexpansion);
+            }
+            v --;
+        }
+        
+        while (t >= TBASE)
+        {
+            int ce = TrieBuilder.get32(mapping, t);
+            if ((ce & RuleBasedCollator.CE_SPECIAL_FLAG_) 
+                                      != RuleBasedCollator.CE_SPECIAL_FLAG_) {
+                setMaxExpansion(ce, (byte)3, maxexpansion);
+            }
+            t --;
+        }
+        // According to the docs, 99% of the time, the Jamo will not be special 
+        if (jamospecial) {
+            // gets the max expansion in all unicode characters
+            int count = maxjamoexpansion.m_endExpansionCE_.size();
+            byte maxTSize = (byte)(maxjamoexpansion.m_maxLSize_ + 
+                                   maxjamoexpansion.m_maxVSize_ +
+                                   maxjamoexpansion.m_maxTSize_);
+            byte maxVSize = (byte)(maxjamoexpansion.m_maxLSize_ + 
+                                   maxjamoexpansion.m_maxVSize_);
+        
+            while (count > 0) {
+                count --;
+                if (((Boolean)maxjamoexpansion.m_isV_.get(count)).booleanValue()
+                                                                     == true) {
+                    setMaxExpansion(((Integer)
+                     maxjamoexpansion.m_endExpansionCE_.get(count)).intValue(), 
+                                                       maxVSize, maxexpansion);
+                }
+                else {
+                    setMaxExpansion(((Integer)
+                     maxjamoexpansion.m_endExpansionCE_.get(count)).intValue(), 
+                                                       maxTSize, maxexpansion);
+                }
+            }
+        }
+    }
+    
+    /**  
+     * To the UnsafeCP hash table, add all chars with combining class != 0     
+     * @param t build table
+     */
+    private static final void unsafeCPAddCCNZ(BuildTable t) 
+    {
+    
+        for (char c = 0; c < 0xffff; c ++) {
+            char fcd = NormalizerImpl.getFCD16(c);
+            if (fcd >= 0x100 || // if the leading combining class(c) > 0 ||
+                (UTF16.isLeadSurrogate(c) && fcd != 0)) {
+                // c is a leading surrogate with some FCD data
+                unsafeCPSet(t.m_unsafeCP_, c);
+            }
+        }
+    
+        if (t.m_prefixLookup_ != null) {
+            int i = -1;
+            Enumeration enum = t.m_prefixLookup_.elements();
+            while (enum.hasMoreElements()) {
+                Elements e = (Elements)enum.nextElement();
+                // codepoints here are in the NFD form. We need to add the
+                // first code point of the NFC form to unsafe, because 
+                // strcoll needs to backup over them.
+                String decomp = Normalizer.decompose(e.m_cPoints_, true);
+                unsafeCPSet(t.m_unsafeCP_, decomp.charAt(0));
+            } 
+        }
+    }
+    
+    /**
+     * Create closure
+     * @param t build table
+     * @param collator RuleBasedCollator
+     * @param colEl collation element iterator
+     * @param start 
+     * @param limit
+     * @param type character type
+     * @return 
+     */
+    private static boolean _enumCategoryRangeClosureCategory(BuildTable t, 
+                                             RuleBasedCollator collator, 
+                                             CollationElementIterator colEl, 
+                                             int start, int limit, int type) 
+    {
+        if (type > 0) { 
+            // if the range is assigned - we might ommit more categories later
+            Elements el = new Elements();
+            for (int u32 = start; u32 < limit; u32 ++) {
+                int len = 0;
+                String comp = UCharacter.toString(u32);
+                String decomp = Normalizer.decompose(comp, false);
+                if (decomp.length() > 1 || (decomp.length() == 1 
+                                          && decomp.charAt(0) != (char)u32)) {
+                    if (!collator.equals(comp, decomp)) {
+                        el.m_cPoints_ = decomp;
+                        el.m_prefix_ = 0;
+                        Vector cevector = new Vector();
+                        Elements prefix = (Elements)t.m_prefixLookup_.get(el);
+                        if (prefix == null) {
+                            el.m_cPoints_ = comp;
+                            el.m_prefix_ = 0;
+                            el.m_prefixChars_ = null;
+                            el.m_CEs_ = null;
+                            colEl.setText(decomp);
+                            int ce = colEl.next();
+                            while (ce != CollationElementIterator.NULLORDER) {
+                                cevector.add(new Integer(ce));
+                                ce = colEl.next();
+                            }
+                            int size = cevector.size();
+                            el.m_CEs_ = new int[size];
+                            for (int i = 0; i < size; i ++) {
+                                el.m_CEs_[i] 
+                                       = ((Integer)cevector.get(i)).intValue();
+                            }
+                        } 
+                        else {
+                            el.m_cPoints_ = comp;
+                            el.m_prefix_ = 0;
+                            el.m_prefixChars_ = null;
+                            el.m_CEs_ = new int[1];
+                            el.m_CEs_[0] = prefix.m_mapCE_;
+                            // This character uses a prefix. We have to add it 
+                            // to the unsafe table, as it decomposed form is 
+                            // already in. In Japanese, this happens for \u309e 
+                            // & \u30fe
+                            // Since unsafeCPSet is static in ucol_elm, we are 
+                            // going to wrap it up in the unsafeCPAddCCNZ 
+                            // function
+                        }
+                        addAnElement(t, el);
+                    }
+                }
+            }
+        }
+        return true;
+    }
+    
+    /**
+ 	 * Determine if a character is a Jamo
+ 	 * @param ch character to test
+ 	 * @return true if ch is a Jamo, false otherwise
+ 	 */
+	private static final boolean isJamo(char ch)
+	{ 
+		return (ch >= 0x1100 && ch <= 0x1112) 
+		       || (ch >= 0x1175 && ch <= 0x1161) 
+		       || (ch >= 0x11A8 && ch <= 0x11C2);
+	}
+}
diff --git a/icu4j/src/com/ibm/icu/text/CollationRuleParser.java b/icu4j/src/com/ibm/icu/text/CollationRuleParser.java
new file mode 100644
index 00000000000..d3fd82d15b6
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/CollationRuleParser.java
@@ -0,0 +1,1724 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2002, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollationRuleParser.java,v $ 
+* $Date: 2002/06/21 23:57:56 $ 
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+import java.io.InputStream;
+import java.io.BufferedInputStream;
+import java.text.ParseException;
+import java.util.Hashtable;
+import com.ibm.icu.lang.UCharacter;
+
+/**
+* Class for parsing collation rules, produces a list of tokens that will be 
+* turned into collation elements
+* @author Syn Wee Quek
+* @since release 2.2, June 7 2002
+* @draft 2.2
+*/
+class CollationRuleParser
+{     
+	// public data members ---------------------------------------------------
+	
+	// package private constructors ------------------------------------------
+	
+	/**
+     * <p>RuleBasedCollator constructor that takes the rules. 
+     * Please see RuleBasedCollator class description for more details on the 
+     * collation rule syntax.</p>
+     * @see java.util.Locale
+     * @param rules the collation rules to build the collation table from.
+     * @exception ParseException thrown when argument rules have an invalid 
+     *            syntax.
+     * @draft 2.2
+     */
+    CollationRuleParser(String rules) throws ParseException
+    {
+    	m_rules_ = Normalizer.decompose(rules, false);
+        m_source_ = new StringBuffer(m_rules_);
+        m_current_ = 0;
+        m_extra_ = new StringBuffer();
+        m_extraCurrent_ = m_source_.length();
+        m_variableTop_ = null;
+        m_parsedToken_ = new ParsedToken();
+        m_hashTable_ = new Hashtable();
+        m_options_ = new OptionSet(RuleBasedCollator.UCA_);
+        m_listHeader_ = new TokenListHeader[512];
+        m_resultLength_ = 0;
+		assembleTokenList();
+    }
+    
+	// package private inner classes -----------------------------------------
+    
+    /**
+     * Collation options set
+     */
+    static class OptionSet 
+    {
+	    // package private constructor ---------------------------------------
+	    
+	    /**
+	     * Initializes the option set with the argument collators
+	     * @param collator option to use
+	     */
+	    OptionSet(RuleBasedCollator collator) 
+	    {
+		    m_variableTopValue_ = collator.m_variableTopValue_;
+		    m_isFrenchCollation_ = collator.isFrenchCollation();
+		    m_isAlternateHandlingShifted_ = collator.isAlternateHandling(true);
+		    m_caseFirst_ = collator.m_caseFirst_;
+		    m_isCaseLevel_ = collator.isCaseLevel();
+		    m_decomposition_ = collator.getDecomposition();
+		    m_strength_ = collator.getStrength();
+		    m_isHiragana4_ = collator.m_isHiragana4_;
+	    }
+	    
+	    // package private data members --------------------------------------
+	    
+	    int m_variableTopValue_;
+	    boolean m_isFrenchCollation_;
+	    /** 
+	     * Attribute for handling variable elements
+	     */
+	    boolean m_isAlternateHandlingShifted_; 
+	    /** 
+	     * who goes first, lower case or uppercase 
+	     */
+	    int m_caseFirst_;         
+	    /** 
+	     * do we have an extra case level 
+	     */
+	    boolean m_isCaseLevel_;         
+	    /** 
+	     * attribute for normalization 
+	     */
+	    int m_decomposition_; 
+	    /** 
+	     * attribute for strength
+	     */
+	    int m_strength_;          
+	    /** 
+	     * attribute for special Hiragana 
+	     */
+	    boolean m_isHiragana4_;         
+    };
+    
+    /**
+     * List of tokens used by the collation rules
+     */
+    static class TokenListHeader
+    {
+        Token m_first_;
+        Token m_last_;
+        Token m_reset_;
+        boolean m_indirect_;
+        int m_baseCE_;
+        int m_baseContCE_;
+        int m_nextCE_;
+        int m_nextContCE_;
+        int m_previousCE_;
+        int m_previousContCE_;
+        int m_pos_[] = new int[Collator.IDENTICAL + 1];
+        int m_gapsLo_[] = new int[3 * (Collator.TERTIARY + 1)];
+        int m_gapsHi_[] = new int[3 * (Collator.TERTIARY + 1)];
+        int m_numStr_[] = new int[3 * (Collator.TERTIARY + 1)];
+        Token m_fStrToken_[] = new Token[Collator.TERTIARY + 1];
+        Token m_lStrToken_[] = new Token[Collator.TERTIARY + 1];
+    };
+    
+    /**
+     * Token wrapper for collation rules
+     */   
+    static class Token 
+    {
+       // package private data members ---------------------------------------
+       
+       int m_CE_[];
+       int m_CELength_;
+       int m_expCE_[];
+       int m_expCELength_;
+       int m_source_;
+       int m_expansion_;
+       int m_prefix_;
+       int m_strength_;
+       int m_toInsert_;
+       int m_polarity_; // 1 for <, <<, <<<, , ; and 0 for >, >>, >>>
+       TokenListHeader m_listHeader_;
+       Token m_previous_;
+       Token m_next_;
+       String m_rules_;
+       
+       // package private constructors ---------------------------------------
+       
+       Token() 
+       {
+           m_CE_ = new int[128];
+           m_expCE_ = new int[128];
+           // TODO: this should also handle reverse
+           m_polarity_ = TOKEN_POLARITY_POSITIVE_; 
+           m_next_ = null;
+           m_previous_ = null;
+           m_CELength_ = 0;
+           m_expCELength_ = 0;
+       }
+                
+       // package private methods --------------------------------------------
+       
+       /**
+        * Hashcode calculation for token
+        * @return the hashcode
+        */
+       public int hashCode()
+       {
+           int result = 0;
+	       int len = (m_source_ & 0xFF000000) >> 24;
+	       int inc = ((len - 32) / 32) + 1;
+	        
+	       int start = m_source_ & 0x00FFFFFF;
+	       int limit = start + len;    
+	
+	       while (start < limit) {
+	           result = (result * 37) + m_rules_.charAt(start);
+	           start += inc;
+	       }
+	       return result;
+	   }
+	
+	   /**
+	    * Equals calculation
+	    * @param target object to compare
+	    * @return true if target is the same as this object
+	    */
+	   public boolean equals(Object target)
+	   {
+	       if (target == this) {
+	           return true;
+	       }
+	       if (target instanceof Token) {
+	           Token t = (Token)target;
+			   int sstart = m_source_ & 0x00FFFFFF;
+			   int tstart = t.m_source_ & 0x00FFFFFF;
+			   int slimit = (m_source_ & 0xFF000000) >> 24;
+			   int tlimit = (m_source_ & 0xFF000000) >> 24;
+			   
+			   int end = sstart + slimit - 1;
+			
+			   if (m_source_ == 0 || t.m_source_ == 0) {
+			       return false;
+			   }
+			               if (slimit != tlimit) {
+			       return false;
+			   }
+			   if (m_source_ == t.m_source_) {
+			       return true;
+			   }
+			   while (sstart < end 
+			          && m_rules_.charAt(sstart) == t.m_rules_.charAt(tstart)) 
+	           {
+	               ++ sstart;
+	               ++ tstart;
+	           }
+			   if (m_rules_.charAt(sstart) == t.m_rules_.charAt(tstart)) {
+			       return true;
+			   }
+	       }    
+	       return false;
+	    }
+    };
+    
+    // package private data member -------------------------------------------
+    
+    /**
+     * Indicator that the token is resetted yet, ie & in the rules
+     */
+    static final int TOKEN_RESET_ = 0xDEADBEEF;
+    
+    /**
+     * Size of the number of tokens
+     */
+    int m_resultLength_;
+    /**
+     * List of parsed tokens
+     */
+    TokenListHeader m_listHeader_[];
+    /**
+     * Variable top token
+     */
+    Token m_variableTop_;
+    /**
+     * Collation options
+     */
+    OptionSet m_options_;
+    /**
+     * Normalized collation rules with some extra characters
+     */
+    StringBuffer m_source_;
+    /**
+     * Hash table to keep all tokens
+     */
+    Hashtable m_hashTable_;
+    
+    // package private method ------------------------------------------------
+    
+    void setDefaultOptionsInCollator(RuleBasedCollator collator)
+    {
+        collator.m_defaultStrength_ = m_options_.m_strength_;
+        collator.m_defaultDecomposition_ = m_options_.m_decomposition_;
+        collator.m_defaultIsFrenchCollation_ = m_options_.m_isFrenchCollation_;
+        collator.m_defaultIsAlternateHandlingShifted_ 
+                                    = m_options_.m_isAlternateHandlingShifted_;
+        collator.m_defaultIsCaseLevel_ = m_options_.m_isCaseLevel_;
+        collator.m_defaultCaseFirst_ = m_options_.m_caseFirst_;
+        collator.m_defaultIsHiragana4_ = m_options_.m_isHiragana4_;
+    }
+    
+    // private inner classes -------------------------------------------------
+    
+    /** 
+     * This is a token that has been parsed but not yet processed. Used to 
+     * reduce the number of arguments in the parser
+     */
+    private static class ParsedToken 
+    {
+        // private constructor ----------------------------------------------
+        
+        /**
+         * Empty constructor
+         */
+        ParsedToken()
+        {
+            m_charsLen_ = 0;
+            m_charsOffset_ = 0;
+            m_extensionLen_ = 0;
+            m_extensionOffset_ = 0;
+            m_prefixLen_ = 0;
+            m_prefixOffset_ = 0;
+            m_flags_ = 0;
+            m_strength_ = TOKEN_UNSET_;
+        }
+        
+        // private data members ---------------------------------------------
+        
+        int m_strength_;
+        int m_charsOffset_;
+        int m_charsLen_;
+        int m_extensionOffset_;
+        int m_extensionLen_;
+        int m_prefixOffset_;
+        int m_prefixLen_;
+        char m_flags_;
+        char m_indirectIndex_;
+    };
+    
+    /**
+     * Boundary wrappers
+     */
+    private static class IndirectBoundaries 
+    {
+        // package private constructor ---------------------------------------
+        
+        IndirectBoundaries(int startce, int startcontce, int limitce, 
+                           int limitcontce) 
+        {
+            m_startCE_ = startce;
+            m_startContCE_ = startcontce;
+            m_limitCE_ = limitce;
+            m_limitContCE_ = limitcontce;
+        }
+        
+        // package private data members --------------------------------------
+        
+	    int m_startCE_;
+	    int m_startContCE_;
+	    int m_limitCE_;
+	    int m_limitContCE_;
+    };
+
+    /**
+     * Collation option rule tag
+     */
+    private static class TokenOption
+    {
+    	// package private constructor ---------------------------------------
+    	
+    	TokenOption(String name, int attribute, String suboptions[],
+    	            int suboptionattributevalue[]) 
+    	{
+    		m_name_ = name;
+    		m_attribute_ = attribute;
+        	m_subOptions_ = suboptions;
+        	m_subOptionAttributeValues_ = suboptionattributevalue;		
+    	}
+    	
+    	// package private data member ---------------------------------------
+    	
+    	private String m_name_;
+    	private int m_attribute_;
+        private String m_subOptions_[];
+        private int m_subOptionAttributeValues_[];
+    };
+    
+    // private variables -----------------------------------------------------
+
+    /**
+     * Current parsed token
+     */
+    private ParsedToken m_parsedToken_;
+    /**
+     * Collation rule
+     */
+    private String m_rules_;
+    private int m_current_;
+    /**
+     * Current offset in m_source
+     */
+    private int m_sourceLimit_;
+    /**
+     * Extra characters to keep during expansion
+     */
+    private StringBuffer m_extra_;
+    /**
+     * Offset to m_extra_
+     */
+    private int m_extraCurrent_;
+    /** 
+     * This is space for the extra strings that need to be unquoted during the 
+     * parsing of the rules 
+     */
+    private static final int TOKEN_EXTRA_RULE_SPACE_SIZE_ = 2048;
+    /**
+     * Indicator that the token is not set yet
+     */
+    private static final int TOKEN_UNSET_ = 0xFFFFFFFF;
+    /**
+     * Indicator that the rule is in the > polarity, ie everything on the 
+     * right of the rule is less than
+     */
+    private static final int TOKEN_POLARITY_NEGATIVE_ = 0;
+    /**
+     * Indicator that the rule is in the < polarity, ie everything on the 
+     * right of the rule is greater than
+     */
+    private static final int TOKEN_POLARITY_POSITIVE_ = 1;
+    /**
+     * Flag mask to determine if top is set
+     */
+    private static final int TOKEN_TOP_MASK_ = 0x04;
+    /**
+     * Flag mask to determine if variable top is set
+     */
+    private static final int TOKEN_VARIABLE_TOP_MASK_ = 0x08;
+    /**
+     * Flag mask to determine if a before attribute is set
+     */
+    private static final int TOKEN_BEFORE_ = 0x03;
+    /**
+     * For use in parsing token options
+     */
+    private static final int TOKEN_SUCCESS_MASK_ = 0x10;
+    
+    /**
+     * Tailoring reset top value
+     */
+    private static final int RESET_TOP_VALUE_ = 0x9F000303;
+    /**
+     * Tailoring next top value
+     */
+    private static final int NEXT_TOP_VALUE_ = 0xE8960303;
+    /**
+     * First primary ignorable ce
+     */
+    private static final int FIRST_PRIMARY_IGNORABLE_ = 0x00008705;
+    /**
+     * Last primary ignorable ce
+     */
+    private static final int LAST_PRIMARY_IGNORABLE_ = 0x0000DD05;
+    /**
+     * Last primary ignorable continuation ce
+     */
+    private static final int LAST_PRIMARY_IGNORABLE_CONT_ = 0x0000C1C0;
+    /**
+     * First secondary ignorable ce
+     */
+    private static final int FIRST_SECONDARY_IGNORABLE_ = 0x00000000;
+    /**
+     * Last secondary ignorable ce
+     */
+    private static final int LAST_SECONDARY_IGNORABLE_ = 0x00000500;
+    /**
+     * First tertiary ignorable ce
+     */
+    private static final int FIRST_TERTIARY_IGNORABLE_ = 0x00000000;
+    /**
+     * Last tertiary ignorable ce
+     */
+    private static final int LAST_TERTIARY_IGNORABLE_ = 0x00000000;
+    /**
+     * First variable ce
+     */
+    private static final int FIRST_VARIABLE_ = 0x05070505;
+    /**
+     * Last variable ce
+     */
+    private static final int LAST_VARIABLE_ = 0x13CF0505;
+    /**
+     * First non variable ce
+     */
+    private static final int FIRST_NON_VARIABLE_ = 0x16200505;
+    /**
+     * Last non variable ce
+     */
+    private static final int LAST_NON_VARIABLE_ = 0x767C0505;
+
+    /** 
+     * These values are used for finding CE values for indirect positioning. 
+     * Indirect positioning is a mechanism for allowing resets on symbolic 
+     * values. It only works for resets and you cannot tailor indirect names.
+     * An indirect name can define either an anchor point or a range. An anchor 
+     * point behaves in exactly the same way as a code point in reset would, 
+     * except that it cannot be tailored. A range (we currently only know for 
+     * the [top] range will explicitly set the upper bound for generated CEs, 
+     * thus allowing for better control over how many CEs can be squeezed 
+     * between in the range without performance penalty. In that respect, we use 
+     * [top] for tailoring of locales that use CJK characters. Other indirect 
+     * values are currently a pure convenience, they can be used to assure that 
+     * the CEs will be always positioned in the same place relative to a point 
+     * with known properties (e.g. first primary ignorable). 
+     */
+    private static final IndirectBoundaries INDIRECT_BOUNDARIES_[] = {
+        new IndirectBoundaries(RESET_TOP_VALUE_, 0, NEXT_TOP_VALUE_, 0),
+        new IndirectBoundaries(FIRST_PRIMARY_IGNORABLE_, 0, 0, 0),
+        new IndirectBoundaries(LAST_PRIMARY_IGNORABLE_, 
+                               LAST_PRIMARY_IGNORABLE_CONT_, 0, 0),  
+        new IndirectBoundaries(FIRST_SECONDARY_IGNORABLE_, 0, 0, 0),
+        new IndirectBoundaries(LAST_SECONDARY_IGNORABLE_, 0, 0, 0),
+        new IndirectBoundaries(FIRST_TERTIARY_IGNORABLE_, 0, 0, 0),
+        new IndirectBoundaries(LAST_TERTIARY_IGNORABLE_, 0, 0, 0),
+        new IndirectBoundaries(FIRST_VARIABLE_, 0, 0, 0),  
+        new IndirectBoundaries(LAST_VARIABLE_, 0, 0, 0),
+        new IndirectBoundaries(FIRST_NON_VARIABLE_, 0, 0, 0),  
+        new IndirectBoundaries(LAST_NON_VARIABLE_, 0, 0, 0),
+    };
+   
+    /**
+     * Inverse UCA constants
+     */
+    private static final int INVERSE_SIZE_MASK_ = 0xFFF00000;
+    private static final int INVERSE_OFFSET_MASK_ = 0x000FFFFF;
+    private static final int INVERSE_SHIFT_VALUE_ = 20;
+    
+    /**
+     * Collation option tags
+     * [last variable] last variable value 
+     * [last primary ignorable] largest CE for primary ignorable 
+     * [last secondary ignorable] largest CE for secondary ignorable 
+     * [last tertiary ignorable] largest CE for tertiary ignorable 
+     * [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8) 
+	 */
+	private static final TokenOption RULES_OPTIONS_[];
+	
+	static 
+	{
+		RULES_OPTIONS_ = new TokenOption[17];
+		String option[] = {"non-ignorable", "shifted"};
+		int value[] = {RuleBasedCollator.AttributeValue.NON_IGNORABLE_, 
+	                   RuleBasedCollator.AttributeValue.SHIFTED_};
+	    RULES_OPTIONS_[0] = new TokenOption("alternate", 
+	                          RuleBasedCollator.Attribute.ALTERNATE_HANDLING_, 
+	                          option, value);
+        option = new String[1];
+        option[0] = "2";
+        value = new int[1];
+        value[0] = RuleBasedCollator.AttributeValue.ON_;
+        RULES_OPTIONS_[1] = new TokenOption("backwards", 
+	                             RuleBasedCollator.Attribute.FRENCH_COLLATION_,
+                                 option, value);
+	    String offonoption[] = new String[2];
+        offonoption[0] = "off"; 
+        offonoption[1] = "on";
+        int offonvalue[] = new int[2];
+        offonvalue[0] = RuleBasedCollator.AttributeValue.OFF_; 
+        offonvalue[1] = RuleBasedCollator.AttributeValue.ON_;
+        RULES_OPTIONS_[2] = new TokenOption("caseLevel", 
+                                       RuleBasedCollator.Attribute.CASE_LEVEL_,
+                                       offonoption, offonvalue);
+	    option = new String[3];
+        option[0] = "lower"; 
+        option[1] = "upper";
+        option[1] = "off";
+        value = new int[3];
+        value[0] = RuleBasedCollator.AttributeValue.LOWER_FIRST_;
+        value[1] = RuleBasedCollator.AttributeValue.UPPER_FIRST_;
+        value[2] = RuleBasedCollator.AttributeValue.OFF_;
+        RULES_OPTIONS_[3] = new TokenOption("caseFirst", 
+                                       RuleBasedCollator.Attribute.CASE_FIRST_, 
+                                       option, value);
+        RULES_OPTIONS_[4] = new TokenOption("normalization", 
+                               RuleBasedCollator.Attribute.NORMALIZATION_MODE_,
+        	                   offonoption, offonvalue);
+	    RULES_OPTIONS_[5] = new TokenOption("hiraganaQ", 
+	                     RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_,
+	    	             offonoption, offonvalue);
+        option = new String[5];
+        option[0] = "1"; 
+        option[1] = "2";
+        option[2] = "3";
+        option[3] = "4"; 
+        option[4] = "I";
+        value = new int[5];
+        value[0] = RuleBasedCollator.AttributeValue.PRIMARY_;
+        value[1] = RuleBasedCollator.AttributeValue.SECONDARY_;
+        value[2] = RuleBasedCollator.AttributeValue.TERTIARY_;
+        value[3] = RuleBasedCollator.AttributeValue.QUATERNARY_;
+        value[4] = RuleBasedCollator.AttributeValue.IDENTICAL_;
+	    RULES_OPTIONS_[6] = new TokenOption("strength",
+                                         RuleBasedCollator.Attribute.STRENGTH_, 
+                                         option, value);
+        RULES_OPTIONS_[7] = new TokenOption("variable top", 
+	                              RuleBasedCollator.Attribute.LIMIT_, 
+                                  null, null);
+	    RULES_OPTIONS_[8] = new TokenOption("rearrange", 
+	                              RuleBasedCollator.Attribute.LIMIT_, 
+                                  null, null);
+        option = new String[3];
+        option[0] = "1";
+        option[1] = "2";
+        option[2] = "3";
+        value = new int[3];
+        value[0] = RuleBasedCollator.AttributeValue.PRIMARY_;
+        value[1] = RuleBasedCollator.AttributeValue.SECONDARY_;
+        value[2] = RuleBasedCollator.AttributeValue.TERTIARY_;
+	    RULES_OPTIONS_[9] = new TokenOption("before", 
+                                  RuleBasedCollator.Attribute.LIMIT_, 
+                                  option, value);
+	    RULES_OPTIONS_[10] = new TokenOption("top", 
+                                  RuleBasedCollator.Attribute.LIMIT_, 
+                                  null, null);
+        String firstlastoption[] = new String[5];
+        firstlastoption[0] = "primary";
+        firstlastoption[1] = "secondary";
+        firstlastoption[2] = "tertiary";
+        firstlastoption[3] = "variable";
+        firstlastoption[4] = "non-ignorable";
+        int firstlastvalue[] = new int[5];
+        firstlastvalue[0] = RuleBasedCollator.AttributeValue.PRIMARY_;
+        firstlastvalue[1] = RuleBasedCollator.AttributeValue.PRIMARY_;
+        firstlastvalue[2] = RuleBasedCollator.AttributeValue.PRIMARY_;
+        firstlastvalue[3] = RuleBasedCollator.AttributeValue.PRIMARY_;
+        firstlastvalue[4] = RuleBasedCollator.AttributeValue.PRIMARY_;
+	    RULES_OPTIONS_[11] = new TokenOption("first", 
+                                  RuleBasedCollator.Attribute.LIMIT_, 
+	    	                      firstlastoption, firstlastvalue);
+        RULES_OPTIONS_[12] = new TokenOption("last", 
+                                  RuleBasedCollator.Attribute.LIMIT_, 
+	    	                      firstlastoption, firstlastvalue);
+	    RULES_OPTIONS_[13] = new TokenOption("undefined", 
+	                              RuleBasedCollator.Attribute.LIMIT_, 
+                                  null, null);
+	    RULES_OPTIONS_[14] = new TokenOption("scriptOrder", 
+	                              RuleBasedCollator.Attribute.LIMIT_, 
+                                  null, null);
+	    RULES_OPTIONS_[15] = new TokenOption("charsetname", 
+	                              RuleBasedCollator.Attribute.LIMIT_, 
+                                  null, null);
+	    RULES_OPTIONS_[16] = new TokenOption("charset", 
+	                              RuleBasedCollator.Attribute.LIMIT_, 
+                                  null, null);
+	};
+    
+    // private methods -------------------------------------------------------
+
+    /**
+     * Assembles the token list
+     * @param
+     * @exception ParseException thrown when rules syntax fails
+     */
+    private int assembleTokenList() throws ParseException
+    {
+		Token lastToken = null;
+	    int parseendoffset = -1;
+		m_parsedToken_.m_strength_ = TOKEN_UNSET_; 
+        int sourcelimit = m_source_.length();
+        int expandNext = 0;
+		
+        while (m_current_ < sourcelimit) {
+            m_parsedToken_.m_prefixOffset_ = 0;
+            // synwee todo
+		    parseendoffset = parseNextToken(lastToken == null);
+		    char specs = m_parsedToken_.m_flags_;
+		    boolean variableTop = ((specs & TOKEN_VARIABLE_TOP_MASK_) != 0);
+		    boolean top = ((specs & TOKEN_TOP_MASK_) != 0);
+		    int lastStrength = TOKEN_UNSET_;
+            if (lastToken != null) {
+                lastStrength = lastToken.m_strength_;
+            }
+            Token key = new Token();
+            key.m_source_ = m_parsedToken_.m_charsLen_ << 24 
+                                             | m_parsedToken_.m_charsOffset_;
+            key.m_rules_ = m_rules_;
+            // 4 Lookup each source in the CharsToToken map, and find a 
+            // sourcetoken
+            Token sourceToken = (Token)m_hashTable_.get(key);
+            if (m_parsedToken_.m_strength_ != TOKEN_RESET_) {
+		        if (lastToken == null) { 
+		        	// this means that rules haven't started properly
+		            throwParseException(m_source_.toString(), 0);
+		        }
+                //  6 Otherwise (when relation != reset)
+               if (sourceToken == null) {
+                   // If sourceToken is null, create new one
+		           sourceToken = new Token();
+     		       sourceToken.m_rules_ = m_source_.toString();
+		           sourceToken.m_source_ = m_parsedToken_.m_charsLen_ << 24 
+		                                   | m_parsedToken_.m_charsOffset_;
+		           sourceToken.m_prefix_ = m_parsedToken_.m_prefixLen_ << 24 
+		                                   | m_parsedToken_.m_prefixOffset_;
+		           // TODO: this should also handle reverse
+		           sourceToken.m_polarity_ = TOKEN_POLARITY_POSITIVE_; 
+		           sourceToken.m_next_ = null;
+		           sourceToken.m_previous_ = null;
+		           sourceToken.m_CELength_ = 0;
+		           sourceToken.m_expCELength_ = 0;
+		           m_hashTable_.put(sourceToken, sourceToken);
+		        } 
+		        else {
+		            // we could have fished out a reset here
+		            if (sourceToken.m_strength_ != TOKEN_RESET_ 
+		                && lastToken != sourceToken) {
+		                // otherwise remove sourceToken from where it was.
+		                if (sourceToken.m_next_ != null) {
+		                    if (sourceToken.m_next_.m_strength_ 
+		                                           > sourceToken.m_strength_) {
+		                        sourceToken.m_next_.m_strength_ 
+		                                           = sourceToken.m_strength_;
+		                    }
+		                    sourceToken.m_next_.m_previous_ 
+		                                            = sourceToken.m_previous_;
+		                } 
+		                else {
+		                    sourceToken.m_listHeader_.m_last_ 
+		                                            = sourceToken.m_previous_;
+		                }
+		                if (sourceToken.m_previous_ != null) {
+		                    sourceToken.m_previous_.m_next_ 
+		                                                = sourceToken.m_next_;
+		                } 
+		                else {
+		                    sourceToken.m_listHeader_.m_first_ 
+		                                                = sourceToken.m_next_;
+		                }
+		                sourceToken.m_next_ = null;
+		                sourceToken.m_previous_ = null;
+		            }
+		        }
+        		sourceToken.m_strength_ = m_parsedToken_.m_strength_;
+                sourceToken.m_listHeader_ = lastToken.m_listHeader_;
+                
+                // 1.  Find the strongest strength in each list, and set 
+                // strongestP and strongestN accordingly in the headers. 
+                if (lastStrength == TOKEN_RESET_
+                    || sourceToken.m_listHeader_.m_first_ == null) {
+                    // If LAST is a reset insert sourceToken in the list.
+			        if (sourceToken.m_listHeader_.m_first_ == null) {
+			            sourceToken.m_listHeader_.m_first_ = sourceToken;
+			            sourceToken.m_listHeader_.m_last_ = sourceToken;
+			        } 
+			        else { // we need to find a place for us
+			               // and we'll get in front of the same strength
+			            if (sourceToken.m_listHeader_.m_first_.m_strength_ 
+			                                     <= sourceToken.m_strength_) {
+			                sourceToken.m_next_ 
+			                              = sourceToken.m_listHeader_.m_first_;
+			                sourceToken.m_next_.m_previous_ = sourceToken;
+			                sourceToken.m_listHeader_.m_first_ = sourceToken;
+			                sourceToken.m_previous_ = null;
+			            } 
+			            else {
+			                lastToken = sourceToken.m_listHeader_.m_first_;
+			                while (lastToken.m_next_ != null 
+			                       && lastToken.m_next_.m_strength_ 
+			                                     > sourceToken.m_strength_) {
+			                    lastToken = lastToken.m_next_;
+			                }
+			                if (lastToken.m_next_ != null) {
+			                    lastToken.m_next_.m_previous_ = sourceToken;
+			                } 
+			                else {
+			                    sourceToken.m_listHeader_.m_last_ 
+			                                                   = sourceToken;
+			                }
+			                sourceToken.m_previous_ = lastToken;
+			                sourceToken.m_next_ = lastToken.m_next_;
+			                lastToken.m_next_ = sourceToken;
+			            }
+			        }
+                } 
+                else {
+                    // Otherwise (when LAST is not a reset) 
+                    // if polarity (LAST) == polarity(relation), insert 
+                    // sourceToken after LAST, otherwise insert before. 
+                    // when inserting after or before, search to the next 
+                    // position with the same strength in that direction. 
+                    // (This is called postpone insertion).        
+			        if (sourceToken != lastToken) { 
+			            if (lastToken.m_polarity_ == sourceToken.m_polarity_) {
+			                while (lastToken.m_next_ != null 
+			                       && lastToken.m_next_.m_strength_ 
+			                                       > sourceToken.m_strength_) {
+			                    lastToken = lastToken.m_next_;
+			                }
+			                sourceToken.m_previous_ = lastToken;
+			                if (lastToken.m_next_ != null) {
+			                    lastToken.m_next_.m_previous_ = sourceToken;
+			                } 
+			                else {
+			                    sourceToken.m_listHeader_.m_last_ = sourceToken;
+			                }
+			                sourceToken.m_next_ = lastToken.m_next_;
+			                lastToken.m_next_ = sourceToken;
+			            } 
+			            else {
+			                while (lastToken.m_previous_ != null 
+			                       && lastToken.m_previous_.m_strength_ 
+		                                        > sourceToken.m_strength_) {
+			                    lastToken = lastToken.m_previous_;
+			                }
+			                sourceToken.m_next_ = lastToken;
+			                if (lastToken.m_previous_ != null) {
+			                    lastToken.m_previous_.m_next_ = sourceToken;
+			                } 
+			                else {
+			                    sourceToken.m_listHeader_.m_first_ 
+			                                                     = sourceToken;
+			                }
+			                sourceToken.m_previous_ = lastToken.m_previous_;
+			                lastToken.m_previous_ = sourceToken;
+			            }
+			        } 
+			        else { // repeated one thing twice in rules, stay with the 
+			        	   // stronger strength 
+			            if (lastStrength < sourceToken.m_strength_) {
+			                sourceToken.m_strength_ = lastStrength;
+			            }
+			        }
+                }
+                // if the token was a variable top, we're gonna put it in 
+                if (variableTop == true && m_variableTop_ == null) {
+                    variableTop = false;
+                    m_variableTop_ = sourceToken;
+                }
+                // Treat the expansions.
+                // There are two types of expansions: explicit (x / y) and 
+                // reset based propagating expansions 
+                // (&abc * d * e <=> &ab * d / c * e / c) 
+                // if both of them are in effect for a token, they are combined.
+               sourceToken.m_expansion_ = m_parsedToken_.m_extensionLen_ << 24 
+                                          | m_parsedToken_.m_extensionOffset_;
+               if (expandNext != 0) {
+		           if (sourceToken.m_strength_ == RuleBasedCollator.PRIMARY) { 
+		               // primary strength kills off the implicit expansion 
+		               expandNext = 0;
+		           } 
+		           else if (sourceToken.m_expansion_ == 0) { 
+		               // if there is no expansion, implicit is just added to 
+		               // the token 
+		               sourceToken.m_expansion_ = expandNext;
+		           } 
+		           else { 
+		               // there is both explicit and implicit expansion. 
+		               // We need to make a combination
+		               m_extra_.delete(0, m_extra_.length());
+		               int start = expandNext & 0xFFFFFF;
+		               m_extra_.append(m_source_.substring(start, 
+   		                                           start + expandNext >>> 24));
+   		               start = m_parsedToken_.m_extensionOffset_;
+		               m_extra_.append(m_source_.substring(start, 
+		                              start + m_parsedToken_.m_extensionLen_));
+		               sourceToken.m_expansion_ = ((expandNext >>> 24) 
+		                               + m_parsedToken_.m_extensionLen_) << 24 
+		                               | m_extraCurrent_;
+		               m_extraCurrent_ += (expandNext >> 24) 
+		                                    + m_parsedToken_.m_extensionLen_;
+		           }
+		        }
+            } 
+            else {
+	            if (lastToken != null && lastStrength == TOKEN_RESET_) {
+	                // if the previous token was also a reset, this means that 
+	                // we have two consecutive resets and we want to remove the 
+	                // previous one if empty
+	                if (m_listHeader_[m_resultLength_ - 1].m_first_ == null) {
+	                    m_resultLength_ --;
+	                }
+	            }
+	            if (sourceToken == null) { 
+	                // this is a reset, but it might still be somewhere in the 
+	                // tailoring, in shorter form
+	                int searchCharsLen = m_parsedToken_.m_charsLen_;
+	                while (searchCharsLen > 1 && sourceToken == null) {
+	                    searchCharsLen --;
+	                    // key = searchCharsLen << 24 | charsOffset;
+			            Token tokenkey = new Token();
+			            tokenkey.m_source_ = searchCharsLen << 24 
+			                                 | m_parsedToken_.m_charsOffset_;
+			            tokenkey.m_rules_ = m_source_.toString();
+			            sourceToken = (Token)m_hashTable_.get(tokenkey);
+			        }
+			        if (sourceToken != null) {
+			            expandNext = (m_parsedToken_.m_charsLen_ 
+			                                          - searchCharsLen) << 24 
+			                            | (m_parsedToken_.m_charsOffset_ 
+			                               + searchCharsLen);
+			        }
+			    }
+		        if ((specs & TOKEN_BEFORE_) != 0 && top == false) { 
+		            // we're doing before & there is no indirection
+	                int strength = (specs & TOKEN_BEFORE_) - 1;
+	                if (sourceToken != null 
+	                    && sourceToken.m_strength_ != TOKEN_RESET_) { 
+	                    // this is a before that is already ordered in the UCA 
+	                    // - so we need to get the previous with good strength
+			            while (sourceToken.m_strength_ > strength 
+			                   && sourceToken.m_previous_ != null) {
+			                sourceToken = sourceToken.m_previous_;
+			            }
+			            // here, either we hit the strength or NULL
+			            if (sourceToken.m_strength_ == strength) {
+			                if (sourceToken.m_previous_ != null) {
+			                    sourceToken = sourceToken.m_previous_;
+			                } 
+			                else { // start of list
+			                    sourceToken 
+			                              = sourceToken.m_listHeader_.m_reset_;
+			                }              
+			            } 
+			            else { // we hit NULL, we should be doing the else part 
+			                sourceToken = sourceToken.m_listHeader_.m_reset_;
+			                sourceToken = getVirginBefore(sourceToken, 
+			                                              strength);
+			            }
+			        } 
+			        else {
+			            sourceToken = getVirginBefore(sourceToken, strength);
+			        }
+			    }
+	            // 5 If the relation is a reset: 
+	            // If sourceToken is null 
+	            // Create new list, create new sourceToken, make the baseCE 
+	            // from source, put the sourceToken in ListHeader of the new 
+	            // list
+	            if (sourceToken == null) {
+	            	m_listHeader_[m_resultLength_] = new TokenListHeader();
+	                // 3 Consider each item: relation, source, and expansion: 
+	                // e.g. ...< x / y ... 
+	                // First convert all expansions into normal form. 
+	                // Examples: 
+	                // If "xy" doesn't occur earlier in the list or in the UCA, 
+	                // convert &xy * c * d * ... into &x * c/y * d * ... 
+	                // Note: reset values can never have expansions, although 
+	                // they can cause the very next item to have one. They may 
+	                // be contractions, if they are found earlier in the list. 
+			        if (top == false) {
+			            CollationElementIterator coleiter 
+                        = RuleBasedCollator.UCA_.getCollationElementIterator(
+                        	m_source_.substring(m_parsedToken_.m_charsOffset_, 
+                                            	m_parsedToken_.m_charsOffset_ 
+                                            	+ m_parsedToken_.m_charsLen_));
+			
+			            int CE = coleiter.next();
+			            int expand = coleiter.getOffset();
+			            int SecondCE = coleiter.next();
+			            
+			            m_listHeader_[m_resultLength_].m_baseCE_ 
+			                                                 = CE & 0xFFFFFF3F;
+			            if (RuleBasedCollator.isContinuation(SecondCE)) {
+			                m_listHeader_[m_resultLength_].m_baseContCE_ 
+			                                                        = SecondCE;
+			            } 
+			            else {
+			                m_listHeader_[m_resultLength_].m_baseContCE_ = 0;
+			            }
+			            m_listHeader_[m_resultLength_].m_nextCE_ = 0;
+			            m_listHeader_[m_resultLength_].m_nextContCE_ = 0;
+			            m_listHeader_[m_resultLength_].m_previousCE_ = 0;
+			            m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
+			            m_listHeader_[m_resultLength_].m_indirect_ = false;
+			            sourceToken = new Token();
+			            expandNext = initAReset(expand, sourceToken);
+			        } 
+			        else { // top == TRUE
+			            top = false;
+			            m_listHeader_[m_resultLength_].m_previousCE_ = 0;
+			            m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
+			            m_listHeader_[m_resultLength_].m_indirect_ = true;
+			            IndirectBoundaries ib = INDIRECT_BOUNDARIES_[
+                                              m_parsedToken_.m_indirectIndex_];
+			            if ((specs & TOKEN_BEFORE_) == 0) { 
+			                // indirect without before, just use the supplied 
+			                // values
+			                m_listHeader_[m_resultLength_].m_baseCE_ 
+			                                                   = ib.m_startCE_;
+			                m_listHeader_[m_resultLength_].m_baseContCE_ 
+		                                                   = ib.m_startContCE_;
+			                m_listHeader_[m_resultLength_].m_nextCE_ 
+			                                                   = ib.m_limitCE_;
+			                m_listHeader_[m_resultLength_].m_nextContCE_ 
+			                                               = ib.m_limitContCE_;
+			            } 
+			            else { // there was a before
+			                // we need to do slightly more work. we need to get 
+			                // the baseCE using the inverse UCA & getPrevious. 
+			                // The next bound is not set, and will be decided 
+			                // in ucol_bld 
+			                int strength = (specs & TOKEN_BEFORE_) - 1;
+			                int baseCE = ib.m_startCE_;
+			                int baseContCE = ib.m_startContCE_;//&0xFFFFFF3F;
+			                CollationParsedRuleBuilder.InverseUCA invuca
+			                         = CollationParsedRuleBuilder.INVERSE_UCA_;
+			                int ce[] = {0, 0, 0};
+			                invuca.getInversePrevCE(baseCE, baseContCE, 
+			                                        strength, ce);
+			                m_listHeader_[m_resultLength_].m_baseCE_ = ce[1];
+			                m_listHeader_[m_resultLength_].m_baseContCE_ 
+			                                                          = ce[2];
+			                m_listHeader_[m_resultLength_].m_nextCE_ = 0;
+			                m_listHeader_[m_resultLength_].m_nextContCE_ = 0;
+			            }
+			            sourceToken = new Token();
+			            expandNext = initAReset(0, sourceToken);
+	                }
+		        } 
+		        else { // reset to something already in rules
+		            top = false;
+		        }
+		    }
+		    // 7 After all this, set LAST to point to sourceToken, and goto 
+		    // step 3.
+		    lastToken = sourceToken;
+		}
+		  
+		if (m_resultLength_ > 0 
+            && m_listHeader_[m_resultLength_ - 1].m_first_ == null) {
+		    m_resultLength_ --;
+		}
+		return m_resultLength_;
+	}
+     
+    /**
+     * Formats and throws a ParseException
+     * @param rules collation rule that failed
+     * @param offset failed offset in rules
+     * @throws ParseException with failure information
+     */
+    private static final void throwParseException(String rules, int offset)
+                                                          throws ParseException
+    {
+        // for pre-context
+        String precontext = rules.substring(0, offset);
+        String postcontext = rules.substring(offset, rules.length());
+        StringBuffer error = new StringBuffer(
+                                    "Parse error occurred in rule at offset "); 
+        error.append(offset); 
+        error.append("\n after the prefix \"");
+        error.append(precontext);
+        error.append("\" before the suffix \"");
+        error.append(postcontext);
+        throw new ParseException(error.toString(), offset);
+    }
+    
+    /**
+     * Getting the next token
+     * @param startofrules flag indicating if we are at the start of rules
+     * @return the offset of the rules
+     * @exception ParseException thrown when rule parsing fails
+     */
+    private int parseNextToken(boolean startofrules) throws ParseException
+    { 
+	    // parsing part
+	    boolean variabletop = false;
+	    boolean top = false;
+	    boolean inchars = true;
+	    boolean inquote = false;
+	    boolean wasinquote = false;
+	    byte before = 0;
+	    boolean isescaped = false;
+	    int newcharslen = 0, newextensionlen = 0;
+	    int charsoffset = 0, extensionoffset = 0;
+	    int newstrength = TOKEN_UNSET_; 
+	
+	    m_parsedToken_.m_prefixOffset_ = 0; 
+        m_parsedToken_.m_prefixLen_ = 0;
+	    m_parsedToken_.m_indirectIndex_ = 0;
+	
+	    int limit = m_source_.length();
+	    while (m_current_ < limit) {
+	        char ch = m_source_.charAt(m_current_);
+	        if (inquote) {
+		        if (ch == 0x0027) { // '\''
+		            inquote = false;
+		        } 
+                else {
+		            if ((newcharslen == 0) || inchars) {
+		                 if (newcharslen == 0) {
+		                     charsoffset = m_extraCurrent_;
+		                 }
+		                 newcharslen ++;
+		            } 
+                    else {
+		                if (newextensionlen == 0) {
+		                    extensionoffset = m_extraCurrent_;
+		                }
+		                newextensionlen ++;
+		            }
+		        }
+	        }
+            else if (isescaped) {
+	            isescaped = false;
+	            if (newstrength == TOKEN_UNSET_) {
+	                throwParseException(m_rules_, m_current_);
+	            }
+	            if (ch != 0 && m_current_ != limit) {
+	                if (inchars) {
+	                    if (newcharslen == 0) {
+	                        charsoffset = m_current_;
+	                    }
+	                    newcharslen ++;
+	                } 
+                    else {
+	                    if (newextensionlen == 0) {
+	                        extensionoffset = m_current_;
+	                    }
+	                    newextensionlen ++;
+	                }
+	            }
+	        }
+            else {
+	            // Sets the strength for this entry
+	            switch (ch) {
+	                case 0x003D : // '='
+	                    if (newstrength != TOKEN_UNSET_) {
+	                        return doEndParseNextToken(newstrength, newcharslen, 
+                                                       top, charsoffset,
+                                                       extensionoffset,
+                                                       newextensionlen, 
+                                                       variabletop, before);
+	                    }
+	                    // if we start with strength, we'll reset to top
+	                    if (startofrules == true) {
+	                        return doEndParseNextToken(TOKEN_RESET_, 
+	                                                   newcharslen,
+                                                       true, charsoffset,
+                                                       extensionoffset, 
+                                                       newextensionlen, 
+                                                       variabletop, before);
+	                    }
+	                    newstrength = Collator.IDENTICAL;
+	                    break;
+	                case 0x002C : // ','
+	                    if (newstrength != TOKEN_UNSET_) {
+	                        return doEndParseNextToken(newstrength, newcharslen,
+                                                       top, charsoffset,
+                                                       extensionoffset, 
+                                                       newextensionlen, 
+                                                       variabletop, before);
+	                    }
+	                    // if we start with strength, we'll reset to top
+	                    if (startofrules == true) {
+	                        return doEndParseNextToken(TOKEN_RESET_, 
+	                                                   newcharslen,
+                                                       true, charsoffset,
+                                                       extensionoffset, 
+                                                       newextensionlen, 
+                                                       variabletop, before);
+	                    }
+	                    newstrength = Collator.TERTIARY;
+	                    break;
+	                case 0x003B : // ';'
+	                    if (newstrength != TOKEN_UNSET_) {
+	                        return doEndParseNextToken(newstrength, newcharslen,
+                                                       true, charsoffset,
+                                                       extensionoffset, 
+                                                       newextensionlen, 
+                                                       variabletop, before);   
+	                    }
+	                    // if we start with strength, we'll reset to top
+	                    if (startofrules == true) {
+	                        return doEndParseNextToken(TOKEN_RESET_, 
+	                                                   newcharslen,
+                                                       true, charsoffset,
+                                                       extensionoffset, 
+                                                       newextensionlen, 
+                                                       variabletop, before);
+	                    }
+	                    newstrength = Collator.SECONDARY;
+	                    break;
+	                case 0x003C : // '<'
+	                    if (newstrength != TOKEN_UNSET_) {
+	                        return doEndParseNextToken(newstrength, newcharslen,
+                                                       top, charsoffset,
+                                                       extensionoffset, 
+                                                       newextensionlen, 
+                                                       variabletop, before);
+	                    }
+	                    // if we start with strength, we'll reset to top
+		                if (startofrules == true) {
+		                    return doEndParseNextToken(TOKEN_RESET_, 
+		                                               newcharslen, true, 
+		                                               charsoffset,
+                                                       extensionoffset, 
+                                                       newextensionlen, 
+                                                       variabletop, before);
+		                }
+		                // before this, do a scan to verify whether this is 
+		                // another strength
+		                if (m_source_.charAt(m_current_ + 1) == 0x003C) {
+		                    m_current_ ++;
+		                    if (m_source_.charAt(m_current_ + 1) == 0x003C) {
+		                        m_current_ ++; // three in a row!
+		                        newstrength = Collator.TERTIARY;
+		                    } 
+		                    else { // two in a row 
+		                        newstrength = Collator.SECONDARY;
+		                    }
+		                } 
+		                else { // just one 
+		                    newstrength = Collator.PRIMARY;
+		                }
+		                break;
+		            case 0x0026 : // '&'
+		                if (newstrength != TOKEN_UNSET_) {
+		                    return doEndParseNextToken(newstrength, newcharslen,
+                                                       top, charsoffset,
+                                                       extensionoffset, 
+                                                       newextensionlen, 
+                                                       variabletop, before);
+		                }
+		                newstrength = TOKEN_RESET_; // PatternEntry::RESET = 0
+		                break;
+		            case 0x005b : // '['
+		                // options - read an option, analyze it
+		                int optionend = m_rules_.indexOf(0x005d, m_current_);
+		                if (optionend != -1) { // ']'
+		                    byte result = readAndSetOption(optionend);
+		                    m_current_ = optionend;
+		                    if ((result & TOKEN_TOP_MASK_) != 0) {
+		                        if (newstrength == TOKEN_RESET_) { 
+		                            charsoffset = m_extraCurrent_;
+		                            m_source_.append((char)0xFFFE);
+		                            IndirectBoundaries ib = 
+		                                      INDIRECT_BOUNDARIES_[
+		                                      m_parsedToken_.m_indirectIndex_];
+		                            m_source_.append((char)(ib.m_startCE_ 
+		                                                               >> 16));
+		                            m_source_.append((char)(ib.m_startCE_ 
+		                                                            & 0xFFFF));
+		                            m_extraCurrent_ += 3;
+		                            m_current_ ++;
+		                            return doEndParseNextToken(newstrength, 
+		                                               3, true, charsoffset,
+                                                       extensionoffset, 
+                                                       newextensionlen, 
+                                                       variabletop, before);
+		                        } 
+		                        else {
+		                            throwParseException(m_rules_, m_current_);
+		                        }
+		                    } 
+		                    else if ((result & TOKEN_VARIABLE_TOP_MASK_) != 0) {
+		                        if (newstrength != TOKEN_RESET_ 
+		                            && newstrength != TOKEN_UNSET_) {
+		                            charsoffset = m_extraCurrent_;
+		                            m_source_.append((char)0xFFFF);
+		                            m_extraCurrent_ ++;
+		                            m_current_ ++;
+		                            return doEndParseNextToken(newstrength, 
+		                                               1, true, charsoffset,
+                                                       extensionoffset, 
+                                                       newextensionlen, 
+                                                       variabletop, before);
+		                        } 
+		                        else {
+		                            throwParseException(m_rules_, m_current_);
+		                        }
+		                    } 
+		                    else if ((result & TOKEN_BEFORE_) != 0){
+		                        if (newstrength == TOKEN_RESET_) {
+		                            before = (byte)(result & TOKEN_BEFORE_);
+		                        } 
+		                        else {
+		                            throwParseException(m_rules_, m_current_);		
+		                        }
+		                    }  
+		                }
+		                break;
+		            // Ignore the white spaces
+		            case 0x0009 : // '\t'
+		            case 0x000C : // '\f'
+		            case 0x000D : // '\r'
+		            case 0x000A : // '\n'
+		            case 0x0020 : // ' '
+		                break; // skip whitespace TODO use Unicode
+		            case 0x002F : // '/'
+		                wasinquote = false; // if we were copying source 
+		                                    // characters, we want to stop now
+		                inchars = false; // we're now processing expansion 
+		                break;
+		            case 0x005C : // back slash for escaped chars
+		                isescaped = true;
+		                break;
+		            // found a quote, we're gonna start copying
+		            case 0x0027 : //'\''
+		                if (newstrength == TOKEN_UNSET_) { 
+		                    // quote is illegal until we have a strength
+		                    throwParseException(m_rules_, m_current_);
+                        }
+		                inquote = true;
+		                if (inchars) { // we're doing characters 
+		                    if (wasinquote == false) {
+		                        charsoffset = m_extraCurrent_;
+		                    }
+		                    if (newcharslen != 0) {
+		                    	m_source_.append(m_source_.substring(
+                                                     m_current_ - newcharslen, 
+		                                             m_current_));
+		                        m_extraCurrent_ += newcharslen;
+		                    }
+		                    newcharslen ++;
+		                } 
+		                else { // we're doing an expansion
+		                    if (wasinquote == false) {
+		                        extensionoffset = m_extraCurrent_;
+		                    }
+		                    if (newextensionlen != 0) {
+		                        m_source_.append(m_source_.substring(
+		                                           m_current_ - newextensionlen,
+		                                           m_current_));
+		                        m_extraCurrent_ += newextensionlen;
+		                    }
+		                    newextensionlen ++;
+		                }
+		                wasinquote = true;
+		                m_current_ ++;
+		                ch = m_source_.charAt(m_current_); 
+		                if (ch == 0x0027) { // copy the double quote
+		                    m_source_.append(ch);
+		                    m_extraCurrent_ ++;
+		                    inquote = false;
+		                }
+		                break;
+		            // '@' is french only if the strength is not currently set
+		            // if it is, it's just a regular character in collation  
+		            case 0x0040 : // '@'
+		                if (newstrength == TOKEN_UNSET_) {
+		                    m_options_.m_isFrenchCollation_ = true;
+		                break;
+		            }
+		            case 0x007C : //|
+		                // this means we have actually been reading prefix part 
+		                // we want to store read characters to the prefix part 
+		                // and continue reading the characters (proper way 
+		                // would be to restart reading the chars, but in that 
+		                // case we would have to complicate the token hasher, 
+		                // which I do not intend to play with. Instead, we will 
+		                // do prefixes when prefixes are due (before adding the 
+		                // elements).
+		                m_parsedToken_.m_prefixOffset_ = charsoffset;
+		                m_parsedToken_.m_prefixLen_ = newcharslen;
+                        if (inchars) { // we're doing characters
+		                    if (wasinquote == false) {
+		                        charsoffset = m_extraCurrent_;
+		                    }
+		                    if (newcharslen != 0) {
+		                    	m_source_.append(m_source_.substring(m_current_, 
+		                    	                                  newcharslen));
+		                        m_extraCurrent_ += newcharslen;
+		                    }
+		                    newcharslen ++;
+		                }
+		                wasinquote = true;
+		                m_current_ ++;
+		                ch = m_source_.charAt(m_current_); 
+		                break;
+	                default :
+	                    if (newstrength == TOKEN_UNSET_) {
+	                        throwParseException(m_rules_, m_current_);
+	                    }
+	                    if (isSpecialChar(ch) && (inquote == false)) {
+	                        throwParseException(m_rules_, m_current_);
+	                    }
+	                    if (ch == 0x0000 && m_current_ + 1 == limit) {
+	                        break;
+	                    }
+	                    if (inchars) {
+	                        if (newcharslen == 0) {
+	                            charsoffset = m_current_;
+	                        }
+	                        newcharslen++;
+	                    } 
+	                    else {
+	                        if (newextensionlen == 0) {
+	                            extensionoffset = m_current_;
+	                        }
+	                        newextensionlen ++;
+	                    }	
+	                    break;
+	                }
+	            }   
+	        if (wasinquote) {
+	            if (ch != 0x27) {
+	              	m_source_.append(ch);
+	                m_extraCurrent_ ++;
+	            }
+	        }   
+	        m_current_ ++;
+	    }  
+	    return doEndParseNextToken(newstrength, newcharslen, top, charsoffset,
+                                   extensionoffset, newextensionlen, 
+                                   variabletop, before);
+	}
+
+    /**
+     * End the next parse token
+     * @param newstrength new strength
+     * @return offset in rules, -1 for end of rules
+     */
+    private int doEndParseNextToken(int newstrength, int newcharslen,
+                                    boolean top, int charsoffset,
+                                    int extensionoffset, int newextensionlen, 
+                                    boolean variabletop, int before)
+                                    throws ParseException
+    {
+        boolean wasinquote = false;
+        if (newstrength == TOKEN_UNSET_) {
+            return -1;
+        }
+        if (newcharslen == 0 && top == false) {
+		    throwParseException(m_rules_, m_current_); 
+		    return -1;
+		}
+		
+		m_parsedToken_.m_strength_ = newstrength; 
+		m_parsedToken_.m_charsOffset_ = charsoffset;
+		m_parsedToken_.m_charsLen_ = newcharslen;
+		m_parsedToken_.m_extensionOffset_ = extensionoffset;
+	    m_parsedToken_.m_extensionLen_ = newextensionlen;
+		m_parsedToken_.m_flags_ = (char)((TOKEN_VARIABLE_TOP_MASK_ * 
+		                           (variabletop ? 1 : 0)) 
+                                  | (TOKEN_TOP_MASK_ * (top ? 1 : 0)) | before);
+		return m_current_;
+    }
+    
+    /**
+     * Token before this element
+     * @param sourcetoken 
+     * @param strength collation strength
+     * @return the token before source token
+     * @exception ParseException thrown when rules have the wrong syntax
+     */
+    private Token getVirginBefore(Token sourcetoken, int strength) 
+                                                          throws ParseException
+    {
+	    // this is a virgin before - we need to fish the anchor from the UCA
+	    StringBuffer str = new StringBuffer();
+	    if (sourcetoken != null) {
+	        str.append(m_source_.charAt(sourcetoken.m_source_ & 0xFFFFFF));
+	    } 
+	    else {
+	    	str.append(m_source_.charAt(m_parsedToken_.m_charsOffset_));
+	    }
+	    CollationElementIterator coleiter = 
+	                       RuleBasedCollator.UCA_.getCollationElementIterator(
+	                                                           str.toString()); 	
+	    int basece = coleiter.next() & 0xFFFFFF3F;
+	    int basecontce = coleiter.next();
+	    if (basecontce == CollationElementIterator.NULLORDER) {
+	        basecontce = 0;
+	    }
+	    int ce[] = new int[3]; // invpos, first ce and second ce
+	    CollationParsedRuleBuilder.INVERSE_UCA_.getInversePrevCE(basece, 
+                                                     basecontce, strength, ce);
+	    int invpos = ce[0];
+	    int ch = CollationParsedRuleBuilder.INVERSE_UCA_.m_table_[3 * invpos 
+                                                                  + 2];
+        if ((ch &  INVERSE_SIZE_MASK_) != 0) {
+	        int offset = ch & INVERSE_OFFSET_MASK_;
+	        ch = CollationParsedRuleBuilder.INVERSE_UCA_.m_continuations_[
+                                                                       offset];
+	    }      
+	    m_source_.append((char)ch);
+	    m_extraCurrent_ ++;
+	    m_parsedToken_.m_charsOffset_ = m_extraCurrent_ - 1;
+	    m_parsedToken_.m_charsLen_ = 1;
+	
+	    // We got an UCA before. However, this might have been tailored.
+	    // example:
+	    // &\u30ca = \u306a
+	    // &[before 3]\u306a<<<\u306a|\u309d
+	  
+	    Token key = new Token();
+	    key.m_source_ = (m_parsedToken_.m_charsLen_ << 24) 
+	                                         | m_parsedToken_.m_charsOffset_;
+	    key.m_rules_ = m_rules_;
+	    sourcetoken = (Token)m_hashTable_.get(key);
+	  
+	    // if we found a tailored thing, we have to use the UCA value and 
+	    // construct a new reset token with constructed name
+	    if (sourcetoken != null && sourcetoken.m_strength_ != TOKEN_RESET_) {
+	        // character to which we want to anchor is already tailored. 
+	        // We need to construct a new token which will be the anchor point
+	        m_source_.replace(m_extraCurrent_ - 1, m_extraCurrent_, "\uFFFE");
+	        m_source_.append(ch);
+	        m_extraCurrent_ ++;
+	        m_parsedToken_.m_charsLen_ ++;
+	        m_listHeader_[m_resultLength_].m_baseCE_ = ce[0] & 0xFFFFFF3F;
+	        if (RuleBasedCollator.isContinuation(ce[1])) {
+	            m_listHeader_[m_resultLength_].m_baseContCE_ = ce[1];
+	        } 
+	        else {
+	            m_listHeader_[m_resultLength_].m_baseContCE_ = 0;
+	        }
+	        m_listHeader_[m_resultLength_].m_nextCE_ = 0;
+	        m_listHeader_[m_resultLength_].m_nextContCE_ = 0;
+	        m_listHeader_[m_resultLength_].m_previousCE_ = 0;
+	        m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
+	        m_listHeader_[m_resultLength_].m_indirect_ = false;
+	        sourcetoken = new Token();
+	        initAReset(-1, sourcetoken);   
+	    }
+	    return sourcetoken;
+	}
+	
+	/**
+	 * Processing Description.
+	 * 1. Build a m_listHeader_. Each list has a header, which contains two lists 
+	 * (positive and negative), a reset token, a baseCE, nextCE, and 
+	 * previousCE. The lists and reset may be null. 
+	 * 2. As you process, you keep a LAST pointer that points to the last token 
+	 * you handled. 
+	 * @param expand string offset, -1 for null strings
+	 * @param targetToken tken to update
+	 * @return expandnext offset
+	 * @throws ParseException thrown when rules syntax failed
+	 */
+	private int initAReset(int expand, Token targetToken) throws ParseException
+	{
+	    // do the reset thing
+	    targetToken.m_rules_ = m_rules_;
+	    targetToken.m_source_ = m_parsedToken_.m_charsLen_ << 24 
+	                            | m_parsedToken_.m_charsOffset_;
+	    targetToken.m_expansion_ = m_parsedToken_.m_extensionLen_ << 24 
+	                               | m_parsedToken_.m_extensionOffset_;
+	    if (m_parsedToken_.m_prefixOffset_ != 0) {
+	        throwParseException(m_rules_, m_parsedToken_.m_charsOffset_ - 1);
+	    } 
+	    
+        targetToken.m_prefix_ = 0;
+	    // TODO: this should also handle reverse
+	    targetToken.m_polarity_ = TOKEN_POLARITY_POSITIVE_; 
+	    targetToken.m_strength_ = TOKEN_RESET_;
+	    targetToken.m_next_ = null;
+	    targetToken.m_previous_ = null;
+	    targetToken.m_CELength_ = 0;
+	    targetToken.m_expCELength_ = 0;
+	    targetToken.m_listHeader_ = m_listHeader_[m_resultLength_];
+	    m_listHeader_[m_resultLength_].m_first_ = null;
+	    m_listHeader_[m_resultLength_].m_last_ = null;
+	    m_listHeader_[m_resultLength_].m_first_ = null;
+	    m_listHeader_[m_resultLength_].m_last_ = null;
+	    m_listHeader_[m_resultLength_].m_reset_ = targetToken;
+	
+	    /* 3 Consider each item: relation, source, and expansion: 
+	     * e.g. ...< x / y ... 
+	     * First convert all expansions into normal form. Examples: 
+	     * If "xy" doesn't occur earlier in the list or in the UCA, convert 
+	     * &xy * c * d * ... into &x * c/y * d * ... 
+	     * Note: reset values can never have expansions, although they can 
+	     * cause the very next item to have one. They may be contractions, if 
+	     * they are found earlier in the list. 
+	     */
+	    int result = 0;
+	    if (expand != -1) {
+	        // check to see if there is an expansion
+	        if (m_parsedToken_.m_charsLen_ > 1) {
+	            targetToken.m_source_ = ((expand 
+	                                      - m_parsedToken_.m_charsOffset_ ) 
+	                                      << 24) 
+	                                      | m_parsedToken_.m_charsOffset_;
+	            result = ((m_parsedToken_.m_charsLen_ 
+	                           + m_parsedToken_.m_charsOffset_ - expand) << 24) 
+	                           | expand;
+	        }
+	    }   
+	
+	    m_resultLength_ ++;
+	    m_hashTable_.put(targetToken, targetToken);
+	    return result;
+	}
+	
+	/**
+	 * Checks if an character is special
+	 * @param ch character to test
+	 * @return true if the character is special
+	 */
+	private static final boolean isSpecialChar(char ch)
+	{
+        return (ch <= 0x002F && ch >= 0x0020) || (ch <= 0x003F && ch >= 0x003A) 
+               || (ch <= 0x0060 && ch >= 0x005B) 
+               || (ch <= 0x007E && ch >= 0x007D) || ch == 0x007B;
+	}
+	
+	/**
+	 * Reads and set collation options
+	 * @param optionend offset to the end of the option in rules
+	 * @return TOKEN_SUCCESS if option is set correct, 0 otherwise
+     * @exception ParseException thrown when options in rules are wrong
+	 */
+	private byte readAndSetOption(int optionend) throws ParseException
+	{
+		int start = m_current_ + 1; // skip opening '['
+	    int i = 0;
+	    boolean foundoption = false;
+	    int optionarg = 0;
+	    while (i < RULES_OPTIONS_.length) {
+            String option = RULES_OPTIONS_[i].m_name_;
+            int optionlength = option.length();
+            if (m_rules_.length() < start + optionlength) {
+                throwParseException(m_rules_, start);
+            }
+	        if (option.equalsIgnoreCase(m_rules_.substring(start, start 
+                                                            + optionlength))) {
+                foundoption = true;
+	            if (optionend - start > optionlength) {
+	                optionarg = start + optionlength + 1; 
+                    // start of the options, skip space
+	                while (UCharacter.isWhitespace(m_rules_.charAt(optionarg))) 
+                    {   // eat whitespace
+	                    optionarg ++;
+	                }
+	            }     
+	            break;
+	        }
+	        i ++;
+	    }
+	
+	    if (!foundoption) {
+	        throwParseException(m_rules_, start);
+	    }
+	
+	    if (i < 7) {
+            if (optionarg != 0) {
+	            for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length; 
+                                                                        j ++) {
+                     String subname = RULES_OPTIONS_[i].m_subOptions_[j];
+                     int size = optionarg + subname.length();
+                     if (m_rules_.length() > size && subname.equalsIgnoreCase(
+                                                  m_rules_.substring(optionarg, 
+                                                          subname.length()))) {
+	                     setOptions(m_options_, RULES_OPTIONS_[i].m_attribute_, 
+                             RULES_OPTIONS_[i].m_subOptionAttributeValues_[j]);
+	                     return TOKEN_SUCCESS_MASK_;
+                     }
+	            }
+	        }
+	        throwParseException(m_rules_, optionarg);
+	    } 
+        else if (i == 7) { // variable top
+	        return TOKEN_SUCCESS_MASK_ | TOKEN_VARIABLE_TOP_MASK_;
+	    } 
+        else if (i == 8) { // rearange
+	        return TOKEN_SUCCESS_MASK_;
+	    } 
+        else if (i == 9) { // before
+	        if (optionarg != 0) {
+	            for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length; 
+                                                                        j ++) {
+                     String subname = RULES_OPTIONS_[i].m_subOptions_[j];
+                     int size = optionarg + subname.length();
+                     if (m_rules_.length() > size && subname.equalsIgnoreCase(
+                                                  m_rules_.substring(optionarg, 
+                                                          subname.length()))) {
+	                     return (byte)(TOKEN_SUCCESS_MASK_ 
+                            | RULES_OPTIONS_[i].m_subOptionAttributeValues_[j] 
+                            + 1);
+                     }
+	            }
+	        }
+	        throwParseException(m_rules_, optionarg);
+	    } 
+        else if (i == 10) {  // top, we are going to have an array with 
+            // structures of limit CEs index to this array will be 
+            // src->parsedToken.indirectIndex
+	        m_parsedToken_.m_indirectIndex_ = 0;
+	        return TOKEN_SUCCESS_MASK_ | TOKEN_TOP_MASK_;
+	    } 
+        else if (i < 13) { // first, last 
+	        for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length; j ++) {     
+	            String subname = RULES_OPTIONS_[i].m_subOptions_[j];
+                int size = optionarg + subname.length();
+                if (m_rules_.length() > size && subname.equalsIgnoreCase(
+                                                  m_rules_.substring(optionarg, 
+                                                          subname.length()))) {
+	                m_parsedToken_.m_indirectIndex_ = (char)(i - 10 + j << 1);         
+	                return TOKEN_SUCCESS_MASK_ | TOKEN_TOP_MASK_;
+	            }
+	        }	        
+            throwParseException(m_rules_, optionarg);
+	    } 
+        else {
+	        throwParseException(m_rules_, optionarg);
+	    }
+        return TOKEN_SUCCESS_MASK_; // we will never reach here.
+	}
+
+    /**
+     * Set collation option
+     * @param optionset option set to set
+     * @param attribute type to set
+     * @param value attribute value
+     */
+    private void setOptions(OptionSet optionset, int attribute, int value) 
+    {
+	    switch (attribute) {
+	        case RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_ :
+	            optionset.m_isHiragana4_ 
+                            = (value == RuleBasedCollator.AttributeValue.ON_);
+	            break;
+	        case RuleBasedCollator.Attribute.FRENCH_COLLATION_ :
+	            optionset.m_isFrenchCollation_ 
+                             = (value == RuleBasedCollator.AttributeValue.ON_);
+	            break;
+	        case RuleBasedCollator.Attribute.ALTERNATE_HANDLING_ :
+	            optionset.m_isAlternateHandlingShifted_ 
+                             = (value 
+                                == RuleBasedCollator.AttributeValue.SHIFTED_);
+	            break;
+	        case RuleBasedCollator.Attribute.CASE_FIRST_ :
+	            optionset.m_caseFirst_ = value;
+	            break;
+	        case RuleBasedCollator.Attribute.CASE_LEVEL_ :
+	            optionset.m_isCaseLevel_ 
+                             = (value == RuleBasedCollator.AttributeValue.ON_);
+	            break;
+	        case RuleBasedCollator.Attribute.NORMALIZATION_MODE_ :
+                if (value == RuleBasedCollator.AttributeValue.ON_) {
+                    value = Collator.CANONICAL_DECOMPOSITION;
+                }
+	            optionset.m_decomposition_ = value;
+	            break;
+	        case RuleBasedCollator.Attribute.STRENGTH_ :
+	            optionset.m_strength_ = value;
+	            break;
+	        default :
+	            break;
+	    }
+  	}
+}
diff --git a/icu4j/src/com/ibm/icu/text/Collator.java b/icu4j/src/com/ibm/icu/text/Collator.java
index d49af43ef18..ea8f3b6a736 100755
--- a/icu4j/src/com/ibm/icu/text/Collator.java
+++ b/icu4j/src/com/ibm/icu/text/Collator.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Collator.java,v $ 
-* $Date: 2002/05/20 23:43:01 $ 
-* $Revision: 1.6 $
+* $Date: 2002/06/21 23:56:44 $ 
+* $Revision: 1.7 $
 *
 *******************************************************************************
 */
@@ -15,57 +15,103 @@ package com.ibm.icu.text;
 import java.util.Locale;
 
 /**
-* <p>The Collator class performs locale-sensitive String comparison. 
-* You use this class to build searching and sorting routines for natural 
-* language text.</p> 
-* <p>Collator is an abstract base class. Subclasses implement specific 
-* collation strategies. One subclass, RuleBasedCollator, is currently 
-* provided and is applicable to a wide set of languages. Other subclasses 
-* may be created to handle more specialized needs.</p>
-* <p>Like other locale-sensitive classes, you can use the static factory 
-* method, getInstance, to obtain the appropriate Collator object for a given 
-* locale. You will only need to look at the subclasses of Collator if you need 
-* to understand the details of a particular collation strategy or if you need 
-* to modify that strategy. </p>
-* <p>The following example shows how to compare two strings using the Collator 
-* for the default locale. 
+* <p>
+* Collator is an abstract base class, its subclasses performs 
+* locale-sensitive String comparison. A concrete subclass, RuleBasedCollator, 
+* is provided and it allows customization of the collation ordering by the use 
+* of rule sets.
+* </p>
+* <p>
+* Following the 
+* <a href=http://www.unicode.org>Unicode Consortium</a>'s specifications for
+* the <a href=http://www.unicode.org/unicode/reports/tr10/>
+* Unicode Collation Algorithm (UCA)</a>, there are
+* 5 different levels of strength used in comparisons.
+* <ul>
+* <li>PRIMARY strength: Typically, this is used to denote differences between 
+*     base characters (for example, "a" &lt; "b"). 
+*     It is the strongest difference. For example, dictionaries are divided 
+*     into different sections by base character. 
+* <li>SECONDARY strength: Accents in the characters are considered secondary 
+*     differences (for example, "as" &lt; "&agrave;s" &lt; "at"). Other 
+*     differences 
+*     between letters can also be considered secondary differences, depending 
+*     on the language. A secondary difference is ignored when there is a 
+*     primary difference anywhere in the strings.
+* <li>TERTIARY strength: Upper and lower case differences in characters are 
+*     distinguished at tertiary strength (for example, "ao" &lt; "Ao" &lt; 
+*     "a&ograve;"). In addition, a variant of a letter differs from the base 
+*     form on the tertiary strength (such as "A" and "&#9398;"). Another 
+*     example is the 
+*     difference between large and small Kana. A tertiary difference is ignored 
+*     when there is a primary or secondary difference anywhere in the strings. 
+* <li>QUATERNARY strength: When punctuation is ignored 
+*     <a href=http://www-124.ibm.com/icu/userguide/Collate_Concepts.html#Ignoring_Punctuation>
+*     (see Ignoring Punctuations in the user guide)</a> at PRIMARY to TERTIARY 
+*     strength, an additional strength level can 
+*     be used to distinguish words with and without punctuation (for example, 
+*     "ab" &lt; "a-b" &lt; "aB"). 
+*     This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY 
+*     difference. The QUATERNARY strength should only be used if ignoring 
+*     punctuation is required. 
+* <li>IDENTICAL strength:
+*     When all other strengths are equal, the IDENTICAL strength is used as a 
+*     tiebreaker. The Unicode code point values of the NFD form of each string 
+*     are compared, just in case there is no difference. 
+*     For example, Hebrew cantellation marks are only distinguished at this 
+*     strength. This strength should be used sparingly, as only code point 
+*     values differences between two strings is an extremely rare occurrence. 
+*     Using this strength substantially decreases the performance for both 
+*     comparison and collation key generation APIs. This strength also 
+*     increases the size of the collation key.
+* </ul>
+* Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes, 
+* the canonical decomposition mode and one that does not use any decomposition.
+* The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION
+* is not supported here. If the canonical
+* decomposition mode is set, the Collator handles un-normalized text properly, 
+* producing the same results as if the text were normalized in NFD. If 
+* canonical decomposition is turned off, it is the user's responsibility to 
+* ensure that all text is already in the appropriate form before performing
+* a comparison or before getting a CollationKey.
+* </p>
+* <p>
+* For more information about the collation service see the 
+* <a href="http://oss.software.ibm.com/icu/userguide/Collate_Intro.html">users 
+* guide</a>.
+* </p>
+* <p>
+* Examples of use
 * <pre>
-* // Compare two strings in the default locale
-* Collator myCollator = Collator.getInstance();
-* if (myCollator.compare("abc", "ABC") < 0) {
-*     System.out.println("abc is less than ABC");
-* }
-* else {
-*     System.out.println("abc is greater than or equal to ABC");
-* }
-* </pre>
-* <p>You can set a <code>Collator</code>'s <em>strength</em> property to 
-* determine the level of difference considered significant in comparisons. 
-* Four strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>, 
-* <code>TERTIARY</code>, and <code>IDENTICAL</code>. The exact assignment of 
-* strengths to language features is locale dependant. For example, in Czech, 
-* "e" and "f" are considered primary differences, while "e" and "\u00EA" are 
-* secondary differences, "e" and "E" are tertiary differences and "e" and "e" 
-* are identical. The following shows how both case and accents could be 
-* ignored for US English.</p>
-* <pre>
-* //Get the Collator for US English and set its strength to PRIMARY
+* // Get the Collator for US English and set its strength to PRIMARY
 * Collator usCollator = Collator.getInstance(Locale.US);
 * usCollator.setStrength(Collator.PRIMARY);
 * if (usCollator.compare("abc", "ABC") == 0) {
 *     System.out.println("Strings are equivalent");
 * }
+* 
+* The following example shows how to compare two strings using the Collator 
+* for the default locale. 
+* // Compare two strings in the default locale
+* Collator myCollator = Collator.getInstance();
+* myCollator.setDecomposition(NO_DECOMPOSITION);
+* if (myCollator.compare("&agrave;&#92;u0325", "a&#92;u0325&#768;") != 0) {
+*     System.out.println("&agrave;&#92;u0325 is not equals to a&#92;u0325&#768; without decomposition");
+*     myCollator.setDecomposition(CANONICAL_DECOMPOSITION);
+*     if (myCollator.compare("&agrave;&#92;u0325", "a&#92;u0325&#768;") != 0) {
+*         System.out.println("Error: &agrave;&#92;u0325 should be equals to a&#92;u0325&#768; with decomposition");
+*     }
+*     else {
+*         System.out.println("&agrave;&#92;u0325 is equals to a&#92;u0325&#768; with decomposition");
+*     }
+* }
+* else {
+*     System.out.println("Error: &agrave;&#92;u0325 should be not equals to a&#92;u0325&#768; without decomposition");
+* }
 * </pre>
-* <p>For comparing Strings exactly once, the compare method provides the best 
-* performance. When sorting a list of Strings however, it is generally 
-* necessary to compare each String multiple times. In this case, 
-* CollationKeys provide better performance. The CollationKey class converts a 
-* String to a series of bits that can be compared bitwise against other 
-* CollationKeys. A CollationKey is created by a Collator object for a given 
-* String.</p> 
-* <p>Note: CollationKeys from different Collators can not be compared. See the 
-* class description for CollationKey for an example using CollationKeys. 
 * </p>
+* @see RuleBasedCollator
+* @see CollationKey
 * @author Syn Wee Quek
 * @since release 2.2, April 18 2002
 * @draft 2.2
@@ -76,92 +122,92 @@ public abstract class Collator
 	// public data members ---------------------------------------------------
 	
 	/**
-     * Collator strength value. When set, only PRIMARY differences are
-     * considered significant during comparison. The assignment of strengths
-     * to language features is locale dependant. A common example is for
-     * different base letters ("a" vs "b") to be considered a PRIMARY 
-     * difference.
+     * Strongest collator strength value. Typically, used to denote differences 
+     * between base characters.
+     * See class documentation for more explanation.
      * @see #setStrength
      * @see #getStrength
      * @draft 2.2
      */
-    public final static int PRIMARY 
-    							= RuleBasedCollator.AttributeValue.PRIMARY_;
+    public final static int PRIMARY = 0;
     /**
-     * Collator strength value. When set, only SECONDARY and above 
-     * differences are considered significant during comparison. The 
-     * assignment of strengths to language features is locale dependant. A 
-     * common example is for different accented forms of the same base letter 
-     * ("a" vs "\u00E4") to be considered a SECONDARY difference.
+     * Second level collator strength value. 
+     * Accents in the characters are considered secondary differences.
+     * Other differences between letters can also be considered secondary 
+     * differences, depending on the language. 
+     * See class documentation for more explanation.
      * @see #setStrength
      * @see #getStrength
      * @draft 2.2
      */
-    public final static int SECONDARY 
-    							= RuleBasedCollator.AttributeValue.SECONDARY_;
+    public final static int SECONDARY = 1;
     /**
-     * Collator strength value. When set, only TERTIARY and above differences 
-     * are considered significant during comparison. The assignment of 
-     * strengths to language features is locale dependant. A common example is 
-     * for case differences ("a" vs "A") to be considered a TERTIARY 
-     * difference.
+     * Third level collator strength value. 
+     * Upper and lower case differences in characters are distinguished at this
+     * strength level. In addition, a variant of a letter differs from the base 
+     * form on the tertiary level.
+     * See class documentation for more explanation.
      * @see #setStrength
      * @see #getStrength
      * @draft 2.2
      */
-    public final static int TERTIARY 
-    							= RuleBasedCollator.AttributeValue.TERTIARY_;
-                                   
+    public final static int TERTIARY = 2;                            
     /**
-     * Collator strength value. When set, only QUARTENARY and above differences 
-     * are considered significant during comparison. The assignment of 
-     * strengths to language features is locale dependant.
-     * difference.
+     * Fourth level collator strength value. 
+     * When punctuation is ignored 
+     * <a href=http://www-124.ibm.com/icu/userguide/Collate_Concepts.html#Ignoring_Punctuation>
+     * (see Ignoring Punctuations in the user guide)</a> at PRIMARY to TERTIARY 
+     * strength, an additional strength level can 
+     * be used to distinguish words with and without punctuation
+     * See class documentation for more explanation.
      * @see #setStrength
      * @see #getStrength
      * @draft 2.2
      */
-    public final static int QUATERNARY 
-    							= RuleBasedCollator.AttributeValue.QUATERNARY_;
-
+    public final static int QUATERNARY = 3;
     /**
-     * <p>Collator strength value. When set, all differences are considered 
-     * significant during comparison. The assignment of strengths to language 
-     * features is locale dependant. A common example is for control 
-     * characters ("&#092;u0001" vs "&#092;u0002") to be considered equal at 
-     * the PRIMARY, SECONDARY, and TERTIARY levels but different at the 
-     * IDENTICAL level.  Additionally, differences between pre-composed 
-     * accents such as "&#092;u00C0" (A-grave) and combining accents such as 
-     * "A&#092;u0300" (A, combining-grave) will be considered significant at 
-     * the tertiary level if decomposition is set to NO_DECOMPOSITION.
+     * <p>
+     * Smallest Collator strength value. When all other strengths are equal, 
+     * the IDENTICAL strength is used as a tiebreaker. The Unicode code point 
+     * values of the NFD form of each string are compared, just in case there 
+     * is no difference. 
+     * See class documentation for more explanation.
+     * </p>
+     * <p>
+     * Note this value is different from JDK's
      * </p>
-     * <p>Note this value is different from JDK's</p>
      * @draft 2.2
      */
-    public final static int IDENTICAL 
-    							= RuleBasedCollator.AttributeValue.IDENTICAL_;
+    public final static int IDENTICAL = 15;
 
     /**
-     * <p>Decomposition mode value. With NO_DECOMPOSITION set, accented 
-     * characters will not be decomposed for collation. This is the default 
-     * setting and provides the fastest collation but will only produce 
-     * correct results for languages that do not use accents.</p>
-     * <p>Note this value is different from JDK's</p>
+     * <p>
+     * Decomposition mode value. With NO_DECOMPOSITION set, Strings will not be 
+     * decomposed for collation. This is the default 
+     * decomposition setting unless otherwise specified by the locale used
+     * to create the Collator.
+     * </p>
+     * <p>
+     * Note this value is different from JDK's
+     * </p>
+     * @see #CANONICAL_DECOMPOSITION
      * @see #getDecomposition
      * @see #setDecomposition
      * @draft 2.2
      */
-    public final static int NO_DECOMPOSITION 
-    							= RuleBasedCollator.AttributeValue.OFF_;
-
+    public final static int NO_DECOMPOSITION = 16;
     /**
-     * <p>Decomposition mode value. With CANONICAL_DECOMPOSITION set, 
+     * <p>
+     * Decomposition mode value. With CANONICAL_DECOMPOSITION set, 
      * characters that are canonical variants according to Unicode 2.0 will be 
-     * decomposed for collation. This should be used to get correct collation 
-     * of accented characters.</p>
-     * <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
+     * decomposed for collation.
+     * </p>
+     * <p>
+     * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
      * described in <a href="http://www.unicode.org/unicode/reports/tr15/">
-     * Unicode Technical Report #15</a>.</p>
+     * Unicode Technical Report #15</a>.
+     * </p>
+     * @see #NO_DECOMPOSITION
      * @see #getDecomposition
      * @see #setDecomposition
      * @draft 2.2
@@ -173,9 +219,15 @@ public abstract class Collator
     // public setters --------------------------------------------------------
     
     /**
-     * <p>Sets this Collator's strength property. The strength property 
+     * <p>
+     * Sets this Collator's strength property. The strength property 
      * determines the minimum level of difference considered significant 
-     * during comparison.</p>
+     * during comparison.
+     * </p>
+     * <p> 
+     * The default strength for the Collator is TERTIARY, unless specified 
+     * otherwise by the locale used to create the Collator.
+     * </p>
      * <p>See the Collator class description for an example of use.</p>
      * @param the new strength value.
      * @see #getStrength
@@ -185,10 +237,11 @@ public abstract class Collator
      * @see #QUATERNARY
      * @see #IDENTICAL
      * @exception IllegalArgumentException If the new strength value is not one 
-     * 				of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
+     * 		      of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
      * @draft 2.2
      */
-    public void setStrength(int newStrength) {
+    public void setStrength(int newStrength) 
+    {
         if ((newStrength != PRIMARY) &&
             (newStrength != SECONDARY) &&
             (newStrength != TERTIARY) &&
@@ -200,18 +253,38 @@ public abstract class Collator
     }
     
     /**
-     * Set the decomposition mode of this Collator. See getDecomposition
-     * for a description of decomposition mode.
+     * <p>
+     * Set the decomposition mode of this Collator. 
+     * Setting this decomposition property with CANONICAL_DECOMPOSITION allows 
+     * the Collator to handle 
+     * un-normalized text properly, producing the same results as if the text 
+     * were normalized. If NO_DECOMPOSITION is set, it is the user's 
+     * responsibility to insure that all text is already in the appropriate 
+     * form before a comparison or before getting a CollationKey. Adjusting
+     * decomposition mode allows the user to select between faster and more
+     * complete collation behavior.
+     * </p>
+     * <p>
+     * Since a great majority of the world languages does not require text
+     * normalization, most locales has NO_DECOMPOSITION has the default 
+     * decomposition mode.
+     * <p>
+     * The default decompositon mode for the Collator is NO_DECOMPOSITON, 
+     * unless specified otherwise by the locale used to create the Collator.
+     * </p>
+     * <p>
+     * See getDecomposition for a description of decomposition mode.
+     * </p>
      * @param decomposition the new decomposition mode
      * @see #getDecomposition
      * @see #NO_DECOMPOSITION
      * @see #CANONICAL_DECOMPOSITION
-     * @see #FULL_DECOMPOSITION
-     * @exception IllegalArgumentException If the given value is not a valid decomposition
-     * mode.
+     * @exception IllegalArgumentException If the given value is not a valid 
+     *            decomposition mode.
      * @draft 2.2
      */
-    public void setDecomposition(int decomposition) {
+    public void setDecomposition(int decomposition) 
+    {
         if ((decomposition != NO_DECOMPOSITION) &&
             (decomposition != CANONICAL_DECOMPOSITION)) {
             throw new IllegalArgumentException("Wrong decomposition mode.");
@@ -225,9 +298,11 @@ public abstract class Collator
      * Gets the Collator for the current default locale.
      * The default locale is determined by java.util.Locale.getDefault().
      * @return the Collator for the default locale (for example, en_US) if it
-     *         is created successfully, otherwise if there is a failure,
-     *         null will be returned.
+     *         is created successfully. Otherwise if there is no Collator
+     *         associated with the current locale, the default UCA collator 
+     *         will be returned.
      * @see java.util.Locale#getDefault
+     * @see #getInstance(Locale)
      * @draft 2.2
      */
     public static final Collator getInstance() 
@@ -238,11 +313,13 @@ public abstract class Collator
     /**
      * Gets the Collator for the desired locale.
      * @param locale the desired locale.
-     * @return Collator for the desired locale if it is created successfully,
-     *         otherwise if there is a failure, the default UCA collator will 
-     * 		   be returned.
+     * @return Collator for the desired locale if it is created successfully.
+     *         Otherwise if there is no Collator
+     *         associated with the current locale, the default UCA collator 
+     *         will be returned.
      * @see java.util.Locale
      * @see java.util.ResourceBundle
+     * @see #getInstance()
      * @draft 2.2
      */
     public static final Collator getInstance(Locale locale)
@@ -256,15 +333,19 @@ public abstract class Collator
     }
     
     /**
-     * <p>Returns this Collator's strength property. The strength property 
-     * determines the minimum level of difference considered significant 
-     * during comparison.</p>
-     * <p>See the Collator class description for an example of use.</p>
+     * <p>
+     * Returns this Collator's strength property. The strength property 
+     * determines the minimum level of difference considered significant.
+     * </p>
+     * <p>
+     * See the Collator class description for more details.
+     * </p>
      * @return this Collator's current strength property.
      * @see #setStrength
      * @see #PRIMARY
      * @see #SECONDARY
      * @see #TERTIARY
+     * @see #QUATERNARY
      * @see #IDENTICAL
      * @draft 2.2
      */
@@ -274,24 +355,17 @@ public abstract class Collator
     }
     
     /**
-     * <p>Get the decomposition mode of this Collator. Decomposition mode
-     * determines how Unicode composed characters are handled. Adjusting
-     * decomposition mode allows the user to select between faster and more
-     * complete collation behavior.
-     * <p>The three values for decomposition mode are:
-     * <UL>
-     * <LI>NO_DECOMPOSITION,
-     * <LI>CANONICAL_DECOMPOSITION
-     * <LI>FULL_DECOMPOSITION.
-     * </UL>
-     * See the documentation for these three constants for a description
-     * of their meaning.
+     * <p>
+     * Get the decomposition mode of this Collator. Decomposition mode
+     * determines how Unicode composed characters are handled. 
+     * </p>
+     * <p>
+     * See the Collator class description for more details.
      * </p>
      * @return the decomposition mode
      * @see #setDecomposition
      * @see #NO_DECOMPOSITION
      * @see #CANONICAL_DECOMPOSITION
-     * @see #FULL_DECOMPOSITION
      * @draft 2.2
      */
     public int getDecomposition()
@@ -302,91 +376,68 @@ public abstract class Collator
     // public other methods -------------------------------------------------
 
     /**
-     * Convenience method for comparing the equality of two strings based on
-     * this Collator's collation rules.
+     * Convenience method for comparing the equality of two text Strings based 
+     * on this Collator's collation rules, strength and decomposition mode.
      * @param source the source string to be compared with.
      * @param target the target string to be compared with.
      * @return true if the strings are equal according to the collation
      *         rules. false, otherwise.
      * @see #compare
+     * @exception NullPointerException thrown if either arguments is null.
      * @draft 2.2
      */
-    public boolean equals(String source, String target)
+    public boolean equals(String source, String target) 
     {
         return (compare(source, target) == 0);
     }
-	    
-    /**
-     * Cloning this Collator.
-     * @return a cloned Collator of this object
-     * @draft 2.2
-     */
-    public Object clone()
-    {
-        try {
-            return (Collator)super.clone();
-        } catch (CloneNotSupportedException e) {
-            throw new InternalError();
-        }
-    }
 
     /**
      * Compares the equality of two Collators.
      * @param that the Collator to be compared with this.
      * @return true if this Collator is the same as that Collator;
-     * false otherwise.
+     *         false otherwise.
      * @draft 2.2
      */
-    public boolean equals(Object that)
-    {
-        if (this == that) {
-        	return true;
-        }
-        if (that == null || getClass() != that.getClass()) {
-        	return false;
-        }
-        Collator other = (Collator) that;
-        return ((m_strength_ == other.m_strength_) &&
-                (m_decomposition_ == other.m_decomposition_));
-    }
+    public abstract boolean equals(Object that);
     
     // public abstract methods -----------------------------------------------
 
     /**
-     * Generates the hash code for this Collator.
+     * Generates a unique hash code for this Collator.
      * @draft 2.2
+     * @return 32 bit unique hash code
      */
     public abstract int hashCode();
     
     /**
-     * <p>Compares the source string to the target string according to the
-     * collation rules for this Collator. Returns an integer less than, equal 
-     * to or greater than zero depending on whether the source String is less 
-     * than, equal to or greater than the target string. See the Collator
-     * class description for an example of use.</p>
-     * <p>For a one time comparison, this method has the best performance. If 
-     * a given String will be involved in multiple comparisons, 
-     * CollationKey.compareTo() has the best performance. See the Collator 
-     * class description for an example using CollationKeys.</p>
-     * @param source the source string.
-     * @param target the target string.
+     * <p>
+     * Compares the source text String to the target text String according to 
+     * the collation rules, strength and decomposition mode for this Collator. 
+     * Returns an integer less than, 
+     * equal to or greater than zero depending on whether the source String is 
+     * less than, equal to or greater than the target String. See the Collator
+     * class description for an example of use.
+     * </p>
+     * @param source the source String.
+     * @param target the target String.
      * @return Returns an integer value. Value is less than zero if source is 
      *         less than target, value is zero if source and target are equal, 
      *         value is greater than zero if source is greater than target.
      * @see CollationKey
      * @see #getCollationKey
+     * @exception NullPointerException thrown if either arguments is null.
      * @draft 2.2
      */
     public abstract int compare(String source, String target);
 
     /**
-     * <p>Transforms the String into a series of bits that can be compared 
-     * bitwise to other CollationKeys. CollationKeys provide better 
-     * performance than Collator.compare() when Strings are involved in 
-     * multiple comparisons.</p> 
-     * <p>See the Collator class description for an example using 
-     * CollationKeys.</p>
-     * @param source the string to be transformed into a collation key.
+     * <p>
+     * Transforms the String into a series of bits that can be compared 
+     * bitwise to other CollationKeys. Bits generated depends on the collation
+     * rules, strength and decomposition mode.
+     * </p> 
+     * <p>See the CollationKey class documentation for more information.</p>
+     * @param source the string to be transformed into a CollationKey.
      * @return the CollationKey for the given String based on this Collator's 
      *         collation rules. If the source String is null, a null 
      *         CollationKey is returned.
@@ -396,35 +447,18 @@ public abstract class Collator
      */
     public abstract CollationKey getCollationKey(String source);
     
-    // protected data members ------------------------------------------------
+    // protected constructor -------------------------------------------------
+
+  
+    // private data members --------------------------------------------------
     
     /**
      * Collation strength
      */
-    protected int m_strength_;
+    private int m_strength_ = TERTIARY;
     /**
      * Decomposition mode
      */ 
-    protected int m_decomposition_;
-    
-    // protected constructor -------------------------------------------------
-    
-    /**
-    * <p>Protected constructor for use by subclasses. 
-    * Public access to creating Collators is handled by the API getInstance().
-    * </p>
-    * @draft 2.2
-    */
-    protected Collator() throws Exception
-    {
-    	m_strength_ = TERTIARY;
-    	m_decomposition_ = CANONICAL_DECOMPOSITION;
-    }
-  
-    // protected methods -----------------------------------------------------
-    
-    // private variables -----------------------------------------------------
-
-    // private methods -------------------------------------------------------
+    private int m_decomposition_ = CANONICAL_DECOMPOSITION;
 }
 
diff --git a/icu4j/src/com/ibm/icu/text/CollatorReader.java b/icu4j/src/com/ibm/icu/text/CollatorReader.java
index 7b6f4b0f2a9..bdf8f8cd325 100644
--- a/icu4j/src/com/ibm/icu/text/CollatorReader.java
+++ b/icu4j/src/com/ibm/icu/text/CollatorReader.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollatorReader.java,v $ 
-* $Date: 2002/05/16 20:04:49 $ 
-* $Revision: 1.2 $
+* $Date: 2002/06/21 23:56:47 $ 
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@@ -140,26 +140,28 @@ final class CollatorReader
      * @exception IOException thrown when there's a data error.
      * @draft 2.2
      */
-    public void readOptions(RuleBasedCollator rbc) throws IOException
+    protected void readOptions(RuleBasedCollator rbc) throws IOException
     {
     	rbc.m_variableTopValue_ = m_dataInputStream_.readInt();
-    	rbc.setAttributeDefault(RuleBasedCollator.Attribute.FRENCH_COLLATION_,
-    	                 m_dataInputStream_.readInt());
-    	rbc.setAttributeDefault(
-    	                 RuleBasedCollator.Attribute.ALTERNATE_HANDLING_,
-    	                 m_dataInputStream_.readInt());
-    	rbc.setAttributeDefault(RuleBasedCollator.Attribute.CASE_FIRST_,
-    	                 m_dataInputStream_.readInt());
-      	rbc.setAttributeDefault(RuleBasedCollator.Attribute.CASE_LEVEL_,
-    	                 m_dataInputStream_.readInt());
-      	rbc.setAttributeDefault(
-      	                 RuleBasedCollator.Attribute.NORMALIZATION_MODE_,
-    	                 m_dataInputStream_.readInt());
-      	rbc.setAttributeDefault(RuleBasedCollator.Attribute.STRENGTH_,
-    	                 m_dataInputStream_.readInt());
-		rbc.setAttributeDefault(
-		                 RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_,
-    	                 m_dataInputStream_.readInt());
+    	rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt() 
+    	                                == RuleBasedCollator.AttributeValue.ON_);
+        rbc.m_defaultIsAlternateHandlingShifted_ 
+                                   = (m_dataInputStream_.readInt() == 
+                                    RuleBasedCollator.AttributeValue.SHIFTED_);
+        rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();
+        rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt() 
+                                     == RuleBasedCollator.AttributeValue.ON_);
+        int value = m_dataInputStream_.readInt();
+    	if (value == RuleBasedCollator.AttributeValue.ON_) {
+    		value = Collator.CANONICAL_DECOMPOSITION;
+    	}
+    	else {
+    		value = Collator.NO_DECOMPOSITION;
+    	}
+    	rbc.m_defaultDecomposition_ = value;
+    	rbc.m_defaultStrength_ = m_dataInputStream_.readInt();
+    	rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() 
+    	                             == RuleBasedCollator.AttributeValue.ON_);
     }
     
     /**
@@ -169,7 +171,7 @@ final class CollatorReader
     * @exception IOException thrown when there's a data error.
     * @draft 2.2
     */
-    public void read(RuleBasedCollator rbc) throws IOException
+    protected void read(RuleBasedCollator rbc) throws IOException
     {
     	readHeader(rbc);
     	readOptions(rbc);
@@ -188,7 +190,8 @@ final class CollatorReader
     	for (int i = 0; i < m_contractionCESize_; i ++) {
     		rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
     	}
-    	rbc.m_trie_ = new IntTrie(m_dataInputStream_, rbc);
+    	rbc.m_trie_ = new IntTrie(m_dataInputStream_, 
+                           	  RuleBasedCollator.DataManipulate.getInstance());
     	if (!rbc.m_trie_.isLatin1Linear()) {
     		throw new IOException("Data corrupted, " 
     		                      + "Collator Tries expected to have linear "
@@ -213,6 +216,43 @@ final class CollatorReader
     	}
     }
     
+    /**
+     * Reads in the inverse uca data
+     * @param input input stream with the inverse uca data
+     * @return an object containing the inverse uca data
+     * @exception IOException thrown when error occurs while reading the 
+     *            inverse uca
+     */
+    protected static CollationParsedRuleBuilder.InverseUCA readInverseUCA(
+                                                      InputStream inputStream)
+                                                      throws IOException
+    {
+        ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_, 
+                             DATA_FORMAT_VERSION_, UNICODE_VERSION_);
+        CollationParsedRuleBuilder.InverseUCA result = 
+                                  new CollationParsedRuleBuilder.InverseUCA();
+        DataInputStream input = new DataInputStream(inputStream);        
+        int bytesize = input.readInt();
+        int tablesize = input.readInt(); // in int size
+        int contsize = input.readInt();  // in char size
+        int table = input.readInt(); // in bytes
+        int conts = input.readInt(); // in bytes
+        int size = tablesize * 3; // one column for each strength
+        result.m_table_ = new int[size];
+        result.m_continuations_ = new char[contsize];
+        
+        for (int i = 0; i < size; i ++) {
+            result.m_table_[i] = input.readInt();
+        }
+        for (int i = 0; i < contsize; i ++) {
+            result.m_continuations_[i] = input.readChar();
+        }
+        input.close();
+        return result;
+    }
+    
+    // private inner class -----------------------------------------------
+    
     // private variables -------------------------------------------------
   
     /**
@@ -231,6 +271,14 @@ final class CollatorReader
     private static final byte UNICODE_VERSION_[] = {(byte)0x3, (byte)0x0, 
                                                     (byte)0x0, (byte)0x0};
     /**
+    * Inverse UCA file format version and id that this class understands.
+    * No guarantees are made if a older version is used
+    */
+    private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = {(byte)0x49, 
+                                                               (byte)0x6e,  
+                                                               (byte)0x76, 
+                                                               (byte)0x43};
+    /**
     * Corrupted error string
     */
     private static final String CORRUPTED_DATA_ERROR_ =
diff --git a/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java b/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
index 9284ebb030e..fe144ad8fbb 100755
--- a/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
@@ -5,282 +5,240 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java,v $ 
-* $Date: 2002/05/22 01:14:38 $ 
-* $Revision: 1.7 $
+* $Date: 2002/06/21 23:56:47 $ 
+* $Revision: 1.8 $
 *
 *******************************************************************************
 */
 package com.ibm.icu.text;
 
 import java.io.InputStream;
-import java.io.DataInputStream;
 import java.io.BufferedInputStream;
-import java.io.IOException;
 import java.io.ByteArrayInputStream;
 import java.util.Locale;
 import java.util.ResourceBundle;
-import java.util.MissingResourceException;
 import java.text.CharacterIterator;
 import java.text.StringCharacterIterator;
+import java.text.ParseException;
 import com.ibm.icu.impl.IntTrie;
 import com.ibm.icu.impl.Trie;
 import com.ibm.icu.impl.NormalizerImpl;
 import com.ibm.icu.impl.ICULocaleData;
 
 /**
-* <p>The RuleBasedCollator class is a concrete subclass of Collator that 
-* provides a simple, data-driven, table collator. With this class you can 
-* create a customized table-based Collator. RuleBasedCollator maps characters 
-* to sort keys.</p>
-* <p>RuleBasedCollator has the following restrictions for efficiency (other 
-* subclasses may be used for more complex languages) : 
-* <ol>
-* <li>If a special collation rule controlled by a &lt;modifier&gt; is
-*     specified it applies to the whole collator object.
-* <li>All non-mentioned characters are at the end of the collation order.
-* </ol>
-* </p>
-* <p>The collation table is composed of a list of collation rules, where each 
-* rule is of three forms: 
-*     <pre>
-*    &lt;modifier&gt;
-*    &lt;relation&gt; &lt;text-argument&gt;
-*    &lt;reset&gt; &lt;text-argument&gt;
-* </pre>
-* </p>
-* <p>The definitions of the rule elements is as follows:
-* <UL Type=disc>
-*    <LI><strong>Text-Argument</strong>: A text-argument is any sequence of
-*        characters, excluding special characters (that is, common
-*        whitespace characters [0009-000D, 0020] and rule syntax characters
-*        [0021-002F, 003A-0040, 005B-0060, 007B-007E]). If those
-*        characters are desired, you can put them in single quotes
-*        (e.g. ampersand => '&'). Note that unquoted white space characters
-*        are ignored; e.g. <code>b c</code> is treated as <code>bc</code>.
-*    <LI><strong>Modifier</strong>: There are currently two modifiers that 
-*        turn on special collation rules.
-*        <UL Type=square>
-*            <LI>'@' : Turns on backwards sorting of accents (secondary
-*                      differences), as in French.
-*            <LI>'!' : Turns on Thai/Lao vowel-consonant swapping.  If this
-*                      rule is in force when a Thai vowel of the range
-*                      &#92;U0E40-&#92;U0E44 precedes a Thai consonant of the 
-*                      range &#92;U0E01-&#92;U0E2E OR a Lao vowel of the range 
-*                      &#92;U0EC0-&#92;U0EC4 precedes a Lao consonant of the 
-*                      range &#92;U0E81-&#92;U0EAE then the vowel is placed 
-*                      after the consonant for collation purposes.
-*        </UL>
-*        <p>'@' : Indicates that accents are sorted backwards, as in French.
-*    <LI><strong>Relation</strong>: The relations are the following:
-*        <UL Type=square>
-*            <LI>'&lt;' : Greater, as a letter difference (primary)
-*            <LI>';' : Greater, as an accent difference (secondary)
-*            <LI>',' : Greater, as a case difference (tertiary)
-*            <LI>'=' : Equal
-*        </UL>
-*    <LI><strong>Reset</strong>: There is a single reset
-*        which is used primarily for contractions and expansions, but which
-*        can also be used to add a modification at the end of a set of rules.
-*        <p>'&' : Indicates that the next rule follows the position to where
-*            the reset text-argument would be sorted.
-* </UL>
-* </p>
-* <p>
-* This sounds more complicated than it is in practice. For example, the
-* following are equivalent ways of expressing the same thing:
-* <blockquote>
-* <pre>
-* a &lt; b &lt; c
-* a &lt; b &amp; b &lt; c
-* a &lt; c &amp; a &lt; b
-* </pre>
-* </blockquote>
-* Notice that the order is important, as the subsequent item goes immediately
-* after the text-argument. The following are not equivalent:
-* <blockquote>
-* <pre>
-* a &lt; b &amp; a &lt; c
-* a &lt; c &amp; a &lt; b
-* </pre>
-* </blockquote>
-* Either the text-argument must already be present in the sequence, or some
-* initial substring of the text-argument must be present. 
-* (e.g. "a &lt; b &amp; ae &lt; e" is valid since "a" is present in the 
-* sequence before "ae" is reset). In this latter case, "ae" is not entered and 
-* treated as a single character; instead, "e" is sorted as if it were expanded 
-* to two characters: "a" followed by an "e". This difference appears in 
-* natural languages: in traditional Spanish "ch" is treated as though it 
-* contracts to a single character (expressed as "c &lt; ch &lt; d"), while in 
-* traditional German a-umlaut is treated as though it expanded to two 
-* characters (expressed as 
-* "a,A &lt; b,B ... &amp;ae;&#92;u00e3&amp;AE;&#92;u00c3").
-* [&#92;u00e3 and &#92;u00c3 are, of course, the escape sequences for 
-* a-umlaut.]
-* </p>
-* <p>
-* <strong>Ignorable Characters</strong>
-* <p>
-* For ignorable characters, the first rule must start with a relation (the
-* examples we have used above are really fragments; "a &lt; b" really should 
-* be "&lt; a &lt; b"). If, however, the first relation is not "&lt;", then all 
-* the all text-arguments up to the first "&lt;" are ignorable. For example, 
-* ", - &lt; a &lt; b" makes "-" an ignorable character, as we saw earlier in 
-* the word "black-birds". In the samples for different languages, you see that 
-* most accents are ignorable.</p>
-* <p><strong>Normalization and Accents</strong>
-* <p><code>RuleBasedCollator</code> automatically processes its rule table to
-* include both pre-composed and combining-character versions of accented 
-* characters. Even if the provided rule string contains only base characters 
-* and separate combining accent characters, the pre-composed accented 
-* characters matching all canonical combinations of characters from the rule 
-* string will be entered in the table.</p>
-* <p>This allows you to use a RuleBasedCollator to compare accented strings
-* even when the collator is set to NO_DECOMPOSITION. There are two caveats,
-* however. First, if the strings to be collated contain combining sequences 
-* that may not be in canonical order, you should set the collator to 
-* CANONICAL_DECOMPOSITION or FULL_DECOMPOSITION to enable sorting of combining 
-* sequences. Second, if the strings contain characters with compatibility 
-* decompositions (such as full-width and half-width forms), you must use 
-* FULL_DECOMPOSITION, since the rule tables only include canonical mappings.
-* </p>
-* <p><strong>Errors</strong></p>
-* <p>The following are errors:</p>
-* <UL Type=disc>
-*     <LI>A text-argument contains unquoted punctuation symbols
-*         (e.g. "a &lt; b-c &lt; d").
-*     <LI>A relation or reset character not followed by a text-argument
-*         (e.g. "a &lt; ,b").
-*     <LI>A reset where the text-argument (or an initial substring of the
-*         text-argument) is not already in the sequence.
-*         (e.g. "a &lt; b &amp; e &lt; f")
-* </UL>
-* <p>If you produce one of these errors, a <code>RuleBasedCollator</code> 
-* throws a <code>ParseException</code>.</p>
-* <p><strong>Examples</strong></p>
-* <p>Simple:     "&lt; a &lt; b &lt; c &lt; d"</p>
-* <p>Norwegian:  "&lt; a,A&lt; b,B&lt; c,C&lt; d,D&lt; e,E&lt; f,F&lt; " +
-*                "g,G&lt; h,H&lt; i,I&lt; j,J&lt; k,K&lt; l,L&lt; m,M&lt; " +
-*                "n,N&lt; o,O&lt; p,P&lt; q,Q&lt; r,R&lt; s,S&lt; t,T&lt; " +
-*                "u,U&lt; v,V&lt; w,W&lt; x,X&lt; y,Y&lt; z,Z&lt; " +
-*                "&#92;u00E5=a&#92; u030A,&#92;u00C5=A&#92;u030A;aa,AA&lt; " +
-*                "&#92;u00E6,&#92; u00C6&lt; &#92;u00F8,&#92;u00D8"</p>
-* <p>Normally, to create a rule-based Collator object, you will use
-* <code>Collator</code>'s factory method <code>getInstance</code>. However, to 
-* create a rule-based Collator object with specialized rules tailored to your 
-* needs, you construct the <code>RuleBasedCollator</code> with the rules 
-* contained in a <code>String</code> object. For example:</p>
-* <blockquote>
-* <pre>
-* String Simple = "&lt; a&lt; b&lt; c&lt; d";
-* RuleBasedCollator mySimple = new RuleBasedCollator(Simple);
-* </pre>
-* </blockquote>
-* Or:
-* <blockquote>
-* <pre>
-* String Norwegian = "&lt; a,A&lt; b,B&lt; c,C&lt; d,D&lt; e,E&lt; f,F&lt;" +  
-*                    "g,G&lt; h,H&lt; i,I&lt; j,J &lt; k,K&lt; l,L&lt; " +
-*                    "m,M&lt; n,N&lt; o,O&lt; p,P&lt; q,Q&lt; r,R&lt; " +
-*                    "s,S&lt; t,T &lt; u,U&lt; v,V&lt; w,W&lt; x,X&lt; " +
-*                    "y,Y&lt; z,Z &lt; &#92;u00E5=a&#92;u030A," +
-*                    "&#92;u00C5=A&#92;u030A;aa,AA&lt; &#92;u00E6," +
-*                    "&#92;u00C6&lt; &#92;u00F8,&#92;u00D8";
-* RuleBasedCollator myNorwegian = new RuleBasedCollator(Norwegian);
-* </pre>
-* </blockquote>
-* <p>Combining <code>Collator</code>s is as simple as concatenating strings.
-* Here's an example that combines two <code>Collator</code>s from two
-* different locales:</p>
-* <blockquote>
-* <pre>
-* // Create an en_US Collator object
-* RuleBasedCollator en_USCollator = (RuleBasedCollator)
-*     Collator.getInstance(new Locale("en", "US", ""));
-* // Create a da_DK Collator object
-* RuleBasedCollator da_DKCollator = (RuleBasedCollator)
-*     Collator.getInstance(new Locale("da", "DK", ""));
-* // Combine the two
-* // First, get the collation rules from en_USCollator
-* String en_USRules = en_USCollator.getRules();
-* // Second, get the collation rules from da_DKCollator
-* String da_DKRules = da_DKCollator.getRules();
-* RuleBasedCollator newCollator =
-*     new RuleBasedCollator(en_USRules + da_DKRules);
-* // newCollator has the combined rules
-* </pre>
-* </blockquote>
-* <p>Another more interesting example would be to make changes on an existing
-* table to create a new <code>Collator</code> object. For example, add
-* "&amp;C&lt; ch, cH, Ch, CH" to the <code>en_USCollator</code> object to 
-* create your own:</p>
-* <blockquote>
-* <pre>
-* // Create a new Collator object with additional rules
-* String addRules = "&amp;C&lt; ch, cH, Ch, CH";
-* RuleBasedCollator myCollator =
-*     new RuleBasedCollator(en_USCollator + addRules);
-* // myCollator contains the new rules
-* </pre>
-* </blockquote>
-* <p>The following example demonstrates how to change the order of
-* non-spacing accents,
-* <blockquote>
-* <pre>
-* // old rule
-* String oldRules = 
-*     "=&#92;u0301;&#92;u0300;&#92;u0302;&#92;u0308"    // main accents
-*     + ";&#92;u0327;&#92;u0303;&#92;u0304;&#92;u0305"    // main accents
-*     + ";&#92;u0306;&#92;u0307;&#92;u0309;&#92;u030A"    // main accents
-*     + ";&#92;u030B;&#92;u030C;&#92;u030D;&#92;u030E"    // main accents
-*     + ";&#92;u030F;&#92;u0310;&#92;u0311;&#92;u0312"    // main accents
-*     + "&lt; a , A ; ae, AE ; &#92;u00e6 , &#92;u00c6"
-*     + "&lt; b , B &lt; c, C &lt; e, E & C &lt; d, D";
-* // change the order of accent characters
-* String addOn = "& &#92;u0300 ; &#92;u0308 ; &#92;u0302";
-* RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn);
-* </pre>
-* </blockquote>
-* <p>The last example shows how to put new primary ordering in before the
-* default setting. For example, in Japanese <code>Collator</code>, you
-* can either sort English characters before or after Japanese characters,
-* <blockquote>
-* <pre>
-* // get en_US Collator rules
-* RuleBasedCollator en_USCollator = (RuleBasedCollator)
-*                                             Collator.getInstance(Locale.US);
-* // add a few Japanese character to sort before English characters
-* // suppose the last character before the first base letter 'a' in
-* // the English collation rule is &#92;u2212
-* String jaString = "& &#92;u2212 &lt; &#92;u3041, &#92;u3042 &lt; &#92;u3043, &#92;u3044";
-* RuleBasedCollator myJapaneseCollator = new
-*     RuleBasedCollator(en_USCollator.getRules() + jaString);
-* </pre>
-* @author Syn Wee Quek
-* @since release 2.2, April 18 2002
-* @draft 2.2
-*/
-public class RuleBasedCollator extends Collator implements Trie.DataManipulate
-{     
+ * <p>
+ * The RuleBasedCollator class is a concrete subclass of Collator. It allows
+ * customization of the Collator via user specified rule sets. 
+ * RuleBasedCollator is designed to be fully compliant to the 
+ * <a href = http://www.unicode.org/unicode/reports/tr10/>
+ * Unicode Collation Algorithm (UCA)</a> and conforms to ISO 14651. 
+ * </p>
+ * <p>
+ * Users are strongly encouraged to read  
+ * <a href="http://oss.software.ibm.com/icu/userguide/Collate_Intro.html">
+ * the users guide</a> for more information about the collation service before
+ * using this class.
+ * </p>
+ * <p>
+ * Create a RuleBasedCollator from a locale by calling the getInstance(Locale) 
+ * factory method in the base class Collator. 
+ * Collator.getInstance(Locale) creates a RuleBasedCollator object based on the 
+ * collation rules defined by the argument locale.
+ * If a customized collation ordering ar attributes is required, use the 
+ * RuleBasedCollator(String) constructor with the appropriate rules. The 
+ * customized RuleBasedCollator will base its ordering on UCA, while 
+ * re-adjusting the attributes and orders of the characters in the specified 
+ * rule accordingly. 
+ * <p>
+ * RuleBasedCollator provides correct collation orders for most locales 
+ * supported in ICU. If specific data for a locale is not available, the orders 
+ * eventually falls back to the 
+ * <a href="http://www.unicode.org/unicode/reports/tr10/">UCA collation order
+ * </a>. 
+ * </p>
+ * <p>
+ * For information about the collation rule syntax to use and details about
+ * customization, please refer to the 
+ * <a href="http://oss.software.ibm.com/icu/userguide/Collate_Customization.html">
+ * Collation customization</a> section of the users guide.
+ * </p>
+ * <p>
+ * Note that there are some differences between the Collation rule syntax
+ * used in Java and ICU4J
+ * <ul>
+ * <li>According to the JDK documentation:
+ * <i>
+ * <p>
+ * Modifier '!' : Turns on Thai/Lao vowel-consonant swapping. If this rule 
+ * is in force when a Thai vowel of the range &#92;U0E40-&#92;U0E44 precedes a 
+ * Thai consonant of the range &#92;U0E01-&#92;U0E2E OR a Lao vowel of the 
+ * range &#92;U0EC0-&#92;U0EC4 precedes a Lao consonant of the range 
+ * &#92;U0E81-&#92;U0EAE then the 
+ * vowel is placed after the consonant for collation purposes. 
+ * </p>
+ * <p>
+ * If a rule is without the modifier '!', the Thai/Lao vowel-consonant 
+ * swapping is not turned on.
+ * </p>
+ * </i>
+ * <p>
+ * ICU4J's RuleBasedCollator does not support turning off the Thai/Lao 
+ * vowel-consonant swapping, since the UCA clearly states that it has to be 
+ * supported to ensure a correct sorting order. If a '!' is encountered, it is
+ * ignored.
+ * </p>
+ * <li>According to the JDK documentation:
+ * <i>
+ * <p>
+ * If, however, the first relation is not "&lt;", then all the all 
+ * text-arguments up to the first "&lt;" are ignorable. For example, 
+ * ", - &lt; a &lt; b" makes "-" an ignorable character, as we saw earlier in 
+ * the word "black-birds".
+ * </p>
+ * </i>
+ * <p>
+ * The above allows random characters before the first '&lt;' not in any 
+ * specific sequence to be ignorable. ICU4J does not support this feature.
+ * To define ignorable characters in PRIMARY to TERTIARY strength, users can 
+ * use the rule "& X &lt; [variable top]" to set the variable top to the 
+ * PRIMARY strength of "X". Once alternate handling is set to shifted 
+ * (setAlternateHandling(true)), the Collator using strengths PRIMARY, 
+ * SECONDARY or TERTIARY will ignore all code points with PRIMARY strengths 
+ * less than variable top.    
+ * See the user guide's section on 
+ * <a href=http://www-124.ibm.com/icu/userguide/Collate_Customization.html>
+ * Collation Customization</a> for details.
+ * </p>
+ * <li>As mentioned in the documentation of the base class Collator, 
+ *     compatibility decomposition mode is not supported.
+ * </ul>
+ * </p>
+ * <p>
+ * <strong>Examples</strong>
+ * </p>
+ * <p>
+ * Creating Customized RuleBasedCollators
+ * <blockquote>
+ * <pre>
+ * String Simple = "&amp; a &lt; b &lt; c &lt; d";
+ * RuleBasedCollator mySimple = new RuleBasedCollator(Simple);
+ * 
+ * String Norwegian = "&amp; a , A &lt; b , B &lt; c , C &lt; d , D &lt; e , E "
+ *                    + "&lt; f , F &lt; g , G &lt; h , H &lt; i , I &lt; j , "
+ *                    + "J &lt; k , K &lt; l , L &lt; m , M &lt; n , N &lt; "
+ *                    + "o , O &lt; p , P &lt; q , Q &lt r , R &lt s , S &lt; "
+ *                    + "t , T &lt; u , U &lt; v , V &lt; w , W &lt; x , X "
+ *                    + "&lt; y , Y &lt; z , Z &lt; &#92;u00E5 = a&#92;u030A "
+ *                    + ", &#92;u00C5 = A&#92;u030A ; aa , AA &lt; &#92;u00E6 "
+ *                    + ", &#92;u00C6 &lt; &#92;u00F8 , &#92;u00D8";
+ * RuleBasedCollator myNorwegian = new RuleBasedCollator(Norwegian);
+ * </pre>
+ * </blockquote>
+ * Concatenating rules to combining <code>Collator</code>s
+ * <blockquote>
+ * <pre>
+ * // Create an en_US Collator object
+ * RuleBasedCollator en_USCollator = (RuleBasedCollator)
+ *     Collator.getInstance(new Locale("en", "US", ""));
+ * // Create a da_DK Collator object
+ * RuleBasedCollator da_DKCollator = (RuleBasedCollator)
+ *     Collator.getInstance(new Locale("da", "DK", ""));
+ * // Combine the two
+ * // First, get the collation rules from en_USCollator
+ * String en_USRules = en_USCollator.getRules();
+ * // Second, get the collation rules from da_DKCollator
+ * String da_DKRules = da_DKCollator.getRules();
+ * RuleBasedCollator newCollator =
+ *                             new RuleBasedCollator(en_USRules + da_DKRules);
+ * // newCollator has the combined rules
+ * </pre>
+ * </blockquote>
+ * Making changes on an existing RuleBasedCollator to create a new 
+ * <code>Collator</code> object, by appending the existing rule with the 
+ * changes.
+ * <blockquote>
+ * <pre>
+ * // Create a new Collator object with additional rules
+ * String addRules = "&amp; C &lt; ch, cH, Ch, CH";
+ * RuleBasedCollator myCollator =
+ *     new RuleBasedCollator(en_USCollator + addRules);
+ * // myCollator contains the new rules
+ * </pre>
+ * </blockquote>
+ * The following example demonstrates how to change the order of
+ * non-spacing accents,
+ * <blockquote>
+ * <pre>
+ * // old rule with main accents
+ * String oldRules = "= &#92;u0301 ; &#92;u0300 ; &#92;u0302 ; &#92;u0308 "    
+ *                 + "; &#92;u0327 ; &#92;u0303 ; &#92;u0304 ; &#92;u0305 "
+ *                 + "; &#92;u0306 ; &#92;u0307 ; &#92;u0309 ; &#92;u030A " 
+ *                 + "; &#92;u030B ; &#92;u030C ; &#92;u030D ; &#92;u030E "
+ *                 + "; &#92;u030F ; &#92;u0310 ; &#92;u0311 ; &#92;u0312 "
+ *                 + "&lt; a , A ; ae, AE ; &#92;u00e6 , &#92;u00c6 "
+ *                 + "&lt; b , B &lt; c, C &lt; e, E &amp; C &lt; d , D";
+ * // change the order of accent characters
+ * String addOn = "&amp; &#92;u0300 ; &#92;u0308 ; &#92;u0302";
+ * RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn);
+ * </pre>
+ * </blockquote>
+ * Putting new primary ordering in before the default setting, 
+ * e.g. Sort English characters before or after Japanese characters in Japanese 
+ * <code>Collator</code>.
+ * <blockquote>
+ * <pre>
+ * // get en_US Collator rules
+ * RuleBasedCollator en_USCollator 
+ *                        = (RuleBasedCollator)Collator.getInstance(Locale.US);
+ * // add a few Japanese character to sort before English characters
+ * // suppose the last character before the first base letter 'a' in
+ * // the English collation rule is &#92;u2212
+ * String jaString = "& &#92;u2212 &lt &#92;u3041, &#92;u3042 &lt &#92;u3043, "
+ *                   + "&#92;u3044";
+ * RuleBasedCollator myJapaneseCollator 
+ *              = new RuleBasedCollator(en_USCollator.getRules() + jaString);
+ * </pre>
+ * </blockquote>
+ * </p>
+ * @author Syn Wee Quek
+ * @since release 2.2, April 18 2002
+ * @draft 2.2
+ */
+public final class RuleBasedCollator extends Collator 
+{   
 	// public data members ---------------------------------------------------
 	
 	// public constructors ---------------------------------------------------
 	
 	/**
-     * <p>RuleBasedCollator constructor that takes the rules. 
-     * Please see RuleBasedCollator class description for more details on the 
-     * collation rule syntax.</p>
-     * <p>Note different from Java, does not throw a ParseException</p>
-     * @see java.util.Locale
+     * <p>
+     * RuleBasedCollator constructor that takes the argument rules for 
+     * customization. RuleBasedCollator constructed will be based on UCA, 
+     * with the attributes and re-ordering of the characters specified in the 
+     * argument rules.
+     * </p>
+     * <p>See the user guide's section on 
+     * <a href=http://www-124.ibm.com/icu/userguide/Collate_Customization.html>
+     * Collation Customization</a> for details on the rule syntax.
+     * </p>
      * @param rules the collation rules to build the collation table from.
-     * @exception Exception thrown when there's an error creating the collator
+     * @exception ParseException and IOException thrown. ParseException thrown 
+     *            when argument rules have an invalid syntax. IOException 
+     *            thrown when an error occured while reading internal data.
      * @draft 2.2
      */
     public RuleBasedCollator(String rules) throws Exception
     {
-    	setStrength(Collator.TERTIARY);
-        setDecomposition(Collator.CANONICAL_DECOMPOSITION);
+        if (rules == null) {
+            throw new IllegalArgumentException(
+                                            "Collation rules can not be null");
+        }
+    	setWithUCAData();
+        CollationParsedRuleBuilder builder 
+                                       = new CollationParsedRuleBuilder(rules);
+	    
+		builder.setRules(this);
         m_rules_ = rules;
-        // tables = new RBCollationTables(rules, decomp);
         init();
     }
     
@@ -313,36 +271,55 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
     // public setters --------------------------------------------------------
     
     /**
-	 * Sets the Hiragana Quartenary sort to be on or off
-	 * @param flag true if Hiragana Quartenary sort is to be on, false 
+	 * Sets the Hiragana Quaternary mode to be on or off.
+	 * When the Hiragana Quaternary mode turned on, the RuleBasedCollator 
+	 * positions Hiragana characters before all non-ignorable characters in 
+	 * QUATERNARY strength. This is to produce a correct JIS collation order,
+	 * distinguishing between Katakana  and Hiragana characters. 
+	 * @param flag true if Hiragana Quaternary mode is to be on, false 
 	 *        otherwise
+	 * @see #setHiraganaQuaternaryDefault
+	 * @see #isHiraganaQuaternary
 	 * @draft 2.2
 	 */
-	public void setHiraganaQuartenary(boolean flag)
+	public void setHiraganaQuaternary(boolean flag)
 	{
 		m_isHiragana4_ = flag;
 	}
 	
 	/**
-	 * Sets the Hiragana Quartenary sort to be on or off depending on the 
-	 * Collator's locale specific default value.
+	 * Sets the Hiragana Quaternary mode to the initial mode set during 
+	 * construction of the RuleBasedCollator.
+	 * See setHiraganaQuaternary(boolean) for more details.
+	 * @see #setHiraganaQuaternary(boolean)
+	 * @see #isHiraganaQuaternary
 	 * @draft 2.2
 	 */
-	public void setHiraganaQuartenaryDefault()
+	public void setHiraganaQuaternaryDefault()
 	{
 		m_isHiragana4_ = m_defaultIsHiragana4_;
 	}
 	
 	/**
-   	 * Sets the Collator to sort with the indicated casing first
-   	 * @param upper true for sorting uppercased characters before lowercased 
-   	 *              characters, false for sorting lowercased characters before
-   	 *              uppercased characters 
+   	 * Sets the orders of upper cased characters to sort before lower cased 
+   	 * characters or vice versa, in strength TERTIARY. The default 
+   	 * mode is false, and that sorts lower cased characters before upper cased 
+   	 * characters. 
+   	 * If true is set, the RuleBasedCollator will sort upper cased characters 
+   	 * before the lower cased ones.
+   	 * @param upperfirst true for sorting upper cased characters before 
+   	 *                   lower cased characters, false for sorting lower cased 
+   	 *                   characters before upper cased characters 
+   	 * @see #setCaseFirstOff
+   	 * @see #isCaseFirstOff
+   	 * @see #isLowerCaseFirst
+   	 * @see #isUpperCaseFirst
+   	 * @see #setCaseFirstDefault
    	 * @draft 2.2
    	 */
-   	public void setCaseFirst(boolean upper)
+   	public void setCaseFirst(boolean upperfirst)
    	{
-   		if (upper) {
+   		if (upperfirst) {
    			m_caseFirst_ = AttributeValue.UPPER_FIRST_;
    		}
    		else {
@@ -355,6 +332,11 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
    	 * Sets the Collator to ignore any previous setCaseFirst(boolean) calls.
    	 * Ignores case preferences.
    	 * @draft 2.2
+   	 * @see #setCaseFirst(boolean)
+   	 * @see #isCaseFirstOff
+   	 * @see #isLowerCaseFirst
+   	 * @see #isUpperCaseFirst
+   	 * @see #setCaseFirstDefault
    	 */
    	public void setCaseFirstOff()
    	{
@@ -363,10 +345,13 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
    	}
    	
    	/**
-   	 * Sets the case sorting preferences to the Collator's locale specific 
-   	 * default value.
-   	 * @see #setCaseFirst(boolean)
+   	 * Sets the case first mode to the initial mode set during 
+	 * construction of the RuleBasedCollator.
+	 * See setCaseFirst(boolean) for more details.
    	 * @see #setCaseFirstOff
+   	 * @see #isCaseFirstOff
+   	 * @see #isUpperCaseFirst
+   	 * @see #setCaseFirst
    	 * @draft 2.2
    	 */
    	public final void setCaseFirstDefault()
@@ -375,10 +360,12 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
    		updateInternalState();
    	}
    
-   /**
-     * Sets the alternate handling value for quartenary strength to the 
-     * Collator's locale specific default value. 
-     * @see #setAlternateHandling
+    /**
+     * Sets the alternate handling mode to the initial mode set during 
+	 * construction of the RuleBasedCollator.
+	 * See setAlternateHandling(boolean) for more details.
+	 * @see #setAlternateHandling(boolean)
+	 * @see #isAlternateHandling(boolean)
      * @draft 2.2
      */
     public void setAlternateHandlingDefault()
@@ -388,8 +375,11 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
     }
     
     /**
-     * Sets case level sorting to the Collator's locale specific default value.
-     * @see #setCaseLevel
+     * Sets the case level mode to the initial mode set during 
+	 * construction of the RuleBasedCollator.
+	 * See setCaseLevel(boolean) for more details.
+	 * @see #setCaseLevel(boolean)
+	 * @see #isCaseLevel
      * @draft 2.2
      */
     public void setCaseLevelDefault()
@@ -399,19 +389,24 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
     }
     
     /**
-     * Set the decomposition mode to the Collator's locale specific default 
-     * value. 
+     * Sets the decomposition mode to the initial mode set during construction
+     * of the RuleBasedCollator.
+     * See setDecomposition(int) for more details.
      * @see #getDecomposition
+     * @see #setDecomposition(int)
      * @draft 2.2
      */
     public void setDecompositionDefault()
     {
-    	m_decomposition_ = m_defaultDecomposition_;
+    	setDecomposition(m_defaultDecomposition_);
     }
     
     /**
-     * Sets French collation to the Collator's locale specific default value.
-     * @see #getFrenchCollation
+     * Sets the French collation mode to the initial mode set during 
+     * construction of the RuleBasedCollator.
+     * See setFrenchCollation(boolean) for more details.
+     * @see #isFrenchCollation
+     * @see #setFrenchCollation(boolean)
      * @draft 2.2
      */
     public void setFrenchCollationDefault()
@@ -421,20 +416,31 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
     }
     
     /**
-     * <p>Sets strength to the Collator's locale specific default value.</p>
-     * @see #setStrength
+     * Sets the collation strength to the initial mode set during the 
+     * construction of the RuleBasedCollator.
+     * See setStrength(int) for more details.
+     * @see #setStrength(int)
+     * @see #getStrength
      * @draft 2.2
      */
     public void setStrengthDefault()
     {
-    	m_strength_ = m_defaultStrength_;
-    	updateInternalState();
+    	setStrength(m_defaultStrength_);
     }
     
     /**
-     * Sets the French collation
+     * Sets the mode for the direction of SECONDARY weights to be used in 
+     * French collation.
+     * The default value is false which treats SECONDARY weights in the order 
+     * they appear.
+     * If true is set, the SECONDARY weights will be sorted backwards.
+     * See the section on 
+     * <a href=http://www-124.ibm.com/icu/userguide/Collate_ServiceArchitecture.html>
+     * French collation</a> for more information.
      * @param flag true to set the French collation on, false to set it off
      * @draft 2.2
+     * @see #isFrenchCollation
+     * @see #setFrenchCollationDefault
      */
     public void setFrenchCollation(boolean flag) 
     {
@@ -443,11 +449,24 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
     }
     
     /**
-     * Sets the alternate handling for quartenary strength to be either 
-     * shifted or non-ignorable. This attribute will only be effective with
-     * a quartenary strength sort.
-     * @param shifted true if shifted for alternate handling is desired, false 
-     *        for the non-ignorable.
+     * Sets the alternate handling for Quaternary strength to be either 
+     * shifted or non-ignorable. 
+     * See the UCA definition on 
+     * <a href=http://www.unicode.org/unicode/reports/tr10/#�3.2.2 Variable Collation Elements>
+     * Alternate Weighting</a>.
+     * This attribute will only be effective when QUATERNARY strength is set.
+     * The default value for this mode is false, corresponding to the 
+     * NON_IGNORABLE mode in UCA. In the NON-IGNORABLE mode, the 
+     * RuleBasedCollator will treats all the codepoints with non-ignorable 
+     * primary weights in the same way. 
+     * If the mode is set to true, the behaviour corresponds to SHIFTED defined
+     * in UCA, this causes codepoints with PRIMARY orders that are equal or 
+     * below the variable top value to be ignored in PRIMARY order and 
+     * moved to the QUATERNARY order.
+     * @param shifted true if SHIFTED behaviour for alternate handling is 
+     *        desired, false for the NON_IGNORABLE behaviour.
+     * @see #isAlternateHandling(boolean)
+     * @see #setAlternateHandlingDefault
      * @draft 2.2
      */
     public void setAlternateHandling(boolean shifted)
@@ -457,9 +476,28 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
     }
     
     /**
-     * Sets if case level sorting is required.
+     * <p>
+     * When case level is set to true, an additional weight is formed 
+     * between the SECONDARY and TERTIARY weight, known as the case level. 
+     * The case level is used to distinguish large and small Japanese Kana 
+     * characters. Case level could also be used in other situations. 
+     * For example to distinguish certain Pinyin characters. 
+     * The default value is false, where the case level is not generated.
+     * If the case level is set to true, which causes the case level to be 
+     * generated. Contents of the case level are affected by the case first
+     * mode. A simple way to ignore accent differences in a string is to set 
+     * the strength to PRIMARY and enable case level.
+     * </p>
+     * <p>
+     * See the section on 
+     * <a href=http://www-124.ibm.com/icu/userguide/Collate_ServiceArchitecture.html>
+     * case level</a> for more information.
+     * </p>
      * @param flag true if case level sorting is required, false otherwise
      * @draft 2.2
+     * @see #setCaseLevelDefault
+	 * @see #isCaseLevel
+	 * @see #setCaseFirst(boolean)
      */
     public void setCaseLevel(boolean flag) 
     {
@@ -468,12 +506,15 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
     }
 
 	/**
-     * <p>Sets this Collator's strength property. The strength property 
+     * <p>
+     * Sets this Collator's strength property. The strength property 
      * determines the minimum level of difference considered significant 
-     * during comparison.</p>
+     * during comparison.
+     * </p>
      * <p>See the Collator class description for an example of use.</p>
      * @param the new strength value.
      * @see #getStrength
+     * @see #setStrengthDefault
      * @see #PRIMARY
      * @see #SECONDARY
      * @see #TERTIARY
@@ -484,58 +525,44 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
      * @draft 2.2
      */
     public void setStrength(int newStrength) {
-        if ((newStrength != PRIMARY) &&
-            (newStrength != SECONDARY) &&
-            (newStrength != TERTIARY) &&
-            (newStrength != QUATERNARY) &&
-            (newStrength != IDENTICAL)) {
-            throw new IllegalArgumentException("Incorrect comparison level.");
-        }
-        m_strength_ = newStrength;
+        super.setStrength(newStrength);
         updateInternalState();
     }
 
     // public getters --------------------------------------------------------
     
     /**
-     * Internal method called to parse a lead surrogate's ce for the offset
-     * to the next trail surrogate data.
-     * @param ce collation element of the lead surrogate
-     * @return data offset or 0 for the next trail surrogate
+     * Gets the collation rules for this RuleBasedCollator.     
+     * @return returns the collation rules
      * @draft 2.2
      */
-    public int getFoldingOffset(int ce)
-    {
-    	if (isSpecial(ce) && getTag(ce) == CE_SURROGATE_TAG_) {
-    		return (ce & 0xFFFFFF);
-    	}
-    	return 0;
-    }
-    	
-	/**
-     * Gets the collation rules for this RuleBasedCollator.     * @return returns the collation rules
-     * @draft 2.2
-     */
-    public final String getRules()
+    public String getRules()
     {
     	return m_rules_;
     }
 
 	/**
-     * <p>Transforms the String into a series of bits that can be compared 
-     * bitwise to other CollationKeys. CollationKeys provide better 
-     * performance than Collator.compare() when Strings are involved in 
-     * multiple comparisons.</p> 
-     * <p>Internally CollationKey stores its data in a null-terminated byte
-     * array.</p>
-     * <p>See the Collator class description for an example using 
-     * CollationKeys.</p>
-     * @param source the string to be transformed into a collation key.
-     * @return the CollationKey for the given String based on this Collator's 
-     *         collation rules. If the source String is null, a null 
-     *         CollationKey is returned.
+     * <p>
+     * Get a Collation key for the argument String source from this 
+     * RuleBasedCollator. 
+     * </p>
+     * <p>
+     * General recommendation: <br>
+     * If comparison are to be done to the same String multiple times, it would
+     * be more efficient to generate CollationKeys for the Strings and use 
+     * CollationKey.compareTo(CollationKey) for the comparisons.
+     * If the each Strings are compared to only once, using the method
+     * RuleBasedCollator.compare(String, String) will have a better performance.
+     * </p>
+     * <p>
+     * See the class documentation for an explanation about CollationKeys.
+     * </p>
+     * @param source the text String to be transformed into a collation key.
+     * @return the CollationKey for the given String based on this 
+     *         RuleBasedCollator's collation rules. If the source String is 
+     *         null, a null CollationKey is returned.
      * @see CollationKey
-     * @see compare(String, String)
+     * @see #compare(String, String)
      * @draft 2.2
      */
     public CollationKey getCollationKey(String source)
@@ -543,19 +570,20 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
     	if (source == null) {
     		return null;
     	}
+    	int strength = getStrength();
     	boolean compare[] = {m_isCaseLevel_,
     						 true,
-    						 m_strength_ >= SECONDARY,
-    						 m_strength_ >= TERTIARY,
-    						 m_strength_ >= QUATERNARY,
-							 m_strength_ == IDENTICAL
+    						 strength >= SECONDARY,
+    						 strength >= TERTIARY,
+    						 strength >= QUATERNARY,
+							 strength == IDENTICAL
 							};
 
 		byte bytes[][] = {new byte[SORT_BUFFER_INIT_SIZE_CASE_], // case
     					new byte[SORT_BUFFER_INIT_SIZE_1_], // primary 
 						new byte[SORT_BUFFER_INIT_SIZE_2_], // secondary
 						new byte[SORT_BUFFER_INIT_SIZE_3_],	// tertiary	
-						new byte[SORT_BUFFER_INIT_SIZE_4_]	// quartenary
+						new byte[SORT_BUFFER_INIT_SIZE_4_]	// Quaternary
     	};
     	int bytescount[] = {0, 0, 0, 0, 0};
     	int count[] = {0, 0, 0, 0, 0};
@@ -573,31 +601,27 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
     	
     	int bottomCount4 = 0xFF - commonBottom4;
     	// If we need to normalize, we'll do it all at once at the beginning!
-    	if ((compare[5] || m_decomposition_ != NO_DECOMPOSITION)
-    		/*&& UNORM_YES != unorm_quickCheck(source, len, normMode, status)*/
-    		) {
-        	/*
-        	 * len = unorm_internalNormalize(normSource, normSourceLen,
-                                      source, len,
-                                      normMode, FALSE,
-                                      status);
-        	source = normSource;*/
-        	String norm = source;
-        	getSortKeyBytes(norm, compare, bytes, bytescount, count, 
-        					doFrench, hiragana4, commonBottom4, bottomCount4);
+    	if ((compare[5] || getDecomposition() != NO_DECOMPOSITION)
+    		&& Normalizer.quickCheck(source, Normalizer.NFD) 
+    												!= Normalizer.YES) {
+        	source = Normalizer.decompose(source, false);
     	}
-		else {
-			getSortKeyBytes(source, compare, bytes, bytescount, count, doFrench,
+		getSortKeyBytes(source, compare, bytes, bytescount, count, doFrench,
 						hiragana4, commonBottom4, bottomCount4);
-		}
 		byte sortkey[] = getSortKey(source, compare, bytes, bytescount, count, 
 									doFrench, commonBottom4, bottomCount4);
 		return new CollationKey(source, sortkey);
     }
     		    
     /**
-	 * Checks if uppercase is sorted before lowercase
-	 * @return true if Collator sorts uppercase before lower, false otherwise
+	 * Checks if upper cased character is sorted before lower cased character.
+	 * See setCaseFirst(boolean) for details.
+	 * @see #setCaseFirstOff
+   	 * @see #setCaseFirst(boolean)
+   	 * @see #isLowerCaseFirst
+   	 * @see #setCaseFirstDefault
+	 * @return true if upper cased characters are sorted before lower cased 
+	 *         characters, false otherwise
 	 * @draft 2.2
 	 */
 	public boolean isUpperCaseFirst()
@@ -606,8 +630,14 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	}
 	
 	/**
-	 * Checks if lowercase is sorted before uppercase
-	 * @return true if Collator sorts lowercase before upper, false otherwise
+	 * Checks if lower cased character is sorted before upper cased character.
+	 * See setCaseFirst(boolean) for details.
+	 * @see #setCaseFirstOff
+   	 * @see #setCaseFirst(boolean)
+   	 * @see #isUpperCaseFirst
+   	 * @see #setCaseFirstDefault
+	 * @return true lower cased characters are sorted before upper cased 
+	 *         characters, false otherwise
 	 * @draft 2.2
 	 */
 	public boolean isLowerCaseFirst()
@@ -616,8 +646,16 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	}
 	
 	/**
-	 * Checks if case sorting is off.
-	 * @return true if case sorting is off, false otherwise
+	 * Checks if a previous call to setCaseFirst(boolean) is turned off
+	 * by setCaseFirstOff().
+	 * See setCaseFirst(boolean) for details.
+	 * @return true if the customized case sorting is turned off, false 
+	 *         otherwise
+	 * @see #setCaseFirstOff
+   	 * @see #setCaseFirst(boolean)
+   	 * @see #isUpperCaseFirst
+   	 * @see #isLowerCaseFirst
+   	 * @see #setCaseFirstDefault
 	 * @draft 2.2
 	 */
 	public boolean isCaseFirstOff()
@@ -626,28 +664,33 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	}
 	
 	/**
-	 * Checks if the alternate handling attribute is shifted or non-ignorable.
+	 * Checks if the alternate handling behaviour is the UCA defined SHIFTED or 
+	 * NON_IGNORABLE.
 	 * <ul>
 	 * <li>If argument shifted is true and
 	 *     <ul>
 	 *     <li>return value is true, then the alternate handling attribute for 
-	 *         the Collator is shifted. Or
+	 *         the Collator is SHIFTED. Or
 	 *     <li>return value is false, then the alternate handling attribute for
-	 *         the Collator is not shifted
+	 *         the Collator is NON_IGNORABLE
 	 *     </ul>
 	 * <li> If argument shifted is false and 
 	 *     <ul>
 	 *     <li>return value is true, then the alternate handling attribute for 
-	 *         the Collator is non-ignorable. Or
+	 *         the Collator is NON_IGNORABLE. Or
 	 *     <li>return value is false, then the alternate handling attribute for
-	 *         the Collator is not non-ignorable.
+	 *         the Collator is SHIFTED.
 	 *     </ul>
 	 * </ul>
-	 * @param shifted true if checks are to be done on shifted, false if 
-	 *        checks are to be done on non-ignorable
+	 * See setAlternateHandling(boolean) for more details.
+	 * @param shifted true if checks are to be done to see if the SHIFTED 
+	 *        behaviour is on, false if checks are to be done to see if the
+	 *        NON_IGNORABLE behaviour is on.
 	 * @return true or false 
-	 * @draft 2.2
-	 */
+	 * @see #setAlternateHandling(boolean)
+	 * @see #setAlternateHandlingDefault
+     * @draft 2.2
+     */
 	public boolean isAlternateHandling(boolean shifted)
 	{
 		if (shifted) {
@@ -657,8 +700,12 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	}
 	
 	/**
-	 * Checks if case level sorting is on
-	 * @return true if case level sorting is on
+	 * Checks if case level is set to true.
+	 * See setCaseLevel(boolean) for details.
+	 * @return the case level mode
+	 * @see #setCaseLevelDefault
+	 * @see #isCaseLevel
+	 * @see #setCaseLevel(boolean)
 	 * @draft 2.2
 	 */
 	public boolean isCaseLevel()
@@ -667,72 +714,103 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	}
 	
 	/**
-	 * Checks if French Collation sorting is on
-	 * @return true if French Collation sorting is on
+	 * Checks if French Collation is set to true.
+	 * See setFrenchCollation(boolean) for details.
+	 * @return true if French Collation is set to true, false otherwise
+	 * @see #setFrenchCollation(boolean)
+	 * @see #setFrenchCollationDefault
 	 * @draft 2.2
 	 */
 	public boolean isFrenchCollation()
 	{
 		return m_isFrenchCollation_;
 	}
+	
+	/**
+	 * Checks if the Hiragana Quaternary mode is set on.
+	 * See setHiraganaQuaternary(boolean) for more details.
+	 * @return flag true if Hiragana Quaternary mode is on, false otherwise
+	 * @see #setHiraganaQuaternaryDefault
+	 * @see #setHiraganaQuaternary(boolean)
+	 * @draft 2.2
+	 */
+	public boolean isHiraganaQuaternary()
+	{
+		return m_isHiragana4_;
+	}
 		
 	// public other methods -------------------------------------------------
 
     /**
-     * Compares the equality of two RuleBasedCollators.
+     * Compares the equality of two RuleBasedCollator objects.
+     * RuleBasedCollator objects are equivalent if they have the same collation
+     * rules and the same attributes.
      * @param obj the RuleBasedCollator to be compared with.
-     * @return true if this RuleBasedCollator has exactly the same behaviour 
-     *         as obj, false otherwise.
+     * @return true if this RuleBasedCollator has exactly the same 
+     *         collation behaviour as obj, false otherwise.
      * @draft 2.2
      */
     public boolean equals(Object obj) {
-        if (obj == null || !super.equals(obj)) {
+        if (obj == null) {
         	return false;  // super does class check
         }
+        if (this == obj) {
+        	return true;
+        }
+        if (getClass() != obj.getClass()) {
+        	return false;
+        }
         RuleBasedCollator other = (RuleBasedCollator)obj;
         // all other non-transient information is also contained in rules.
-        return (m_rules_.equals(other.m_rules_));
-    }
-    
-    /**
-     * Standard override; no change in semantics.
-     * @draft 2.2
-     */
-    public Object clone() {
-    	// synwee todo: do after all implementation done
-        return null;
+        return getStrength() == other.getStrength() 
+               && getDecomposition() == other.getDecomposition() 
+               && other.m_caseFirst_ == m_caseFirst_
+               && other.m_caseSwitch_ == m_caseSwitch_
+               && other.m_isAlternateHandlingShifted_ 
+                                             == m_isAlternateHandlingShifted_
+               && other.m_isCaseLevel_ == m_isCaseLevel_
+               && other.m_isFrenchCollation_ == m_isFrenchCollation_
+               && other.m_isHiragana4_ == m_isHiragana4_
+               && m_rules_.equals(other.m_rules_);
     }
     
 	/**
-     * Generates the hash code for this RuleBasedCollator.
+     * Generates a unique hash code for this RuleBasedCollator.
      * @return the unique hash code for this Collator
      * @draft 2.2
      */
-    public final int hashCode() 
+    public int hashCode() 
     {
     	return getRules().hashCode();
     }
     
     /**
-     * <p>Compares the source string to the target string according to the
-     * collation rules for this Collator. Returns an integer less than, equal 
-     * to or greater than zero depending on whether the source String is less 
-     * than, equal to or greater than the target string. See the Collator
-     * class description for an example of use.</p>
-     * <p>For a one time comparison, this method has the best performance. If 
-     * a given String will be involved in multiple comparisons, 
-     * CollationKey.compareTo() has the best performance. See the Collator
-     * class description for an example using CollationKeys.</p>
-     * @param source the source string.
-     * @param target the target string.
+     * Compares the source text String to the target text String according to 
+     * the collation rules, strength and decomposition mode for this 
+     * RuleBasedCollator. 
+     * Returns an integer less than, 
+     * equal to or greater than zero depending on whether the source String is 
+     * less than, equal to or greater than the target String. See the Collator
+     * class description for an example of use.
+     * </p>
+     * <p>
+     * General recommendation: <br>
+     * If comparison are to be done to the same String multiple times, it would
+     * be more efficient to generate CollationKeys for the Strings and use 
+     * CollationKey.compareTo(CollationKey) for the comparisons.
+     * If the each Strings are compared to only once, using the method
+     * RuleBasedCollator.compare(String, String) will have a better performance.
+     * </p>
+     * @param source the source text String.
+     * @param target the target text String.
      * @return Returns an integer value. Value is less than zero if source is 
      *         less than target, value is zero if source and target are equal, 
      *         value is greater than zero if source is greater than target.
      * @see CollationKey
-     * @see Collator#getCollationKey
+     * @see #getCollationKey
      * @draft 2.2
      */
-    public final int compare(String source, String target)
+    public int compare(String source, String target)
     {
     	if (source == target) {
 	        return 0;
@@ -753,13 +831,14 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	    	return 1;
 	    }
 
+        int strength = getStrength();
 		// setting up the collator parameters	
 		boolean compare[] = {m_isCaseLevel_,
     						 true,
-    						 m_strength_ >= SECONDARY,
-    						 m_strength_ >= TERTIARY,
-    						 m_strength_ >= QUATERNARY,
-							 m_strength_ == IDENTICAL
+    						 strength >= SECONDARY,
+    						 strength >= TERTIARY,
+    						 strength >= QUATERNARY,
+							 strength == IDENTICAL
 							};
 		boolean doFrench = m_isFrenchCollation_ && compare[2];
     	boolean doShift4 = m_isAlternateHandlingShifted_ && compare[4];
@@ -830,15 +909,13 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	    }
 	    return 0;
     }
-        
-    // public abstract methods -----------------------------------------------
 
-	// protected inner interfaces --------------------------------------------
+    // package private inner interfaces --------------------------------------
     
     /**
 	 * Attribute values to be used when setting the Collator options
-	 */	
-	protected static interface AttributeValue
+	 */
+	static interface AttributeValue
 	{
 		/**
 		 * Indicates that the default attribute value will be used. 
@@ -848,158 +925,250 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 		/** 
 		 * Primary collation strength 
 		 */
-  		static final int PRIMARY_ = 0;
-  		/** 
-  		 * Secondary collation strength 
-  		 */
-  		static final int SECONDARY_ = 1;
-  		/** 
-  		 * Tertiary collation strength 
-  		 */
-  		static final int TERTIARY_ = 2;
-  		/** 
-  		 * Default collation strength 
-  		 */
-  		static final int DEFAULT_STRENGTH_ = TERTIARY;
-  		/**
-  		 * Internal use for strength checks in Collation elements
-  		 */
-  		static final int CE_STRENGTH_LIMIT_ = TERTIARY + 1;
-  		/** 
-  		 * Quaternary collation strength 
-  		 */
-  		static final int QUATERNARY_ = 3;
-  		/** 
-  		 * Identical collation strength 
-  		 */
-  		static final int IDENTICAL_ = 15;
-  		/**
-  		 * Internal use for strength checks
-  		 */
-  		static final int STRENGTH_LIMIT_ = IDENTICAL + 1;
-  		/** 
-  		 * Turn the feature off - works for FRENCH_COLLATION, CASE_LEVEL, 
-  		 * HIRAGANA_QUATERNARY_MODE and DECOMPOSITION_MODE
-  		 */
-  		static final int OFF_ = 16;
-  		/** 
-  		 * Turn the feature on - works for FRENCH_COLLATION, CASE_LEVEL, 
-  		 * HIRAGANA_QUATERNARY_MODE and DECOMPOSITION_MODE
-  		 */
-  		static final int ON_ = 17;
+		static final int PRIMARY_ = Collator.PRIMARY;
+		/** 
+		 * Secondary collation strength 
+		 */
+		static final int SECONDARY_ = Collator.SECONDARY;
+		/** 
+		 * Tertiary collation strength 
+		 */
+		static final int TERTIARY_ = Collator.TERTIARY;
+		/** 
+		 * Default collation strength 
+		 */
+		static final int DEFAULT_STRENGTH_ = Collator.TERTIARY;
+		/**
+		 * Internal use for strength checks in Collation elements
+		 */
+		static final int CE_STRENGTH_LIMIT_ = Collator.TERTIARY + 1;
+		/** 
+		 * Quaternary collation strength 
+		 */
+		static final int QUATERNARY_ = 3;
+		/** 
+		 * Identical collation strength 
+		 */
+		static final int IDENTICAL_ = Collator.IDENTICAL;
+		/**
+		 * Internal use for strength checks
+		 */
+		static final int STRENGTH_LIMIT_ = Collator.IDENTICAL + 1;
+		/** 
+		 * Turn the feature off - works for FRENCH_COLLATION, CASE_LEVEL, 
+		 * HIRAGANA_QUATERNARY_MODE and DECOMPOSITION_MODE
+		 */
+		static final int OFF_ = 16;
+		/** 
+		 * Turn the feature on - works for FRENCH_COLLATION, CASE_LEVEL, 
+		 * HIRAGANA_QUATERNARY_MODE and DECOMPOSITION_MODE
+		 */
+		static final int ON_ = 17;
 		/** 
 		 * Valid for ALTERNATE_HANDLING. Alternate handling will be shifted 
 		 */
-  		static final int SHIFTED_ = 20;
-  		/** 
-  		 * Valid for ALTERNATE_HANDLING. Alternate handling will be non 
-  		 * ignorable 
-  		 */
-  		static final int NON_IGNORABLE_ = 21;
-  		/** 
-  		 * Valid for CASE_FIRST - lower case sorts before upper case 
-  		 */
-  		static final int LOWER_FIRST_ = 24;
-  		/** 
-  		 * Upper case sorts before lower case 
-  		 */
-  		static final int UPPER_FIRST_ = 25;
-  		/** 
-  		 * Valid for NORMALIZATION_MODE ON and OFF are also allowed for this 
-  		 * attribute 
-  		 */
-  		static final int ON_WITHOUT_HANGUL_ = 28;
-  		/**
-  		 * Number of attribute values
-  		 */
-  	    static final int LIMIT_ = 29;
-	}
-    
-    /** 
-     * Attributes that collation service understands. All the attributes can 
-     * take DEFAULT value, as well as the values specific to each one. 
-     */
-	protected static interface Attribute {
-     	/** 
-     	 * Attribute for direction of secondary weights - used in French.
-     	 * Acceptable values are ON, which results in secondary weights being 
-     	 * considered backwards and OFF which treats secondary weights in the 
-     	 * order they appear.
-     	 */
-     	static final int FRENCH_COLLATION_ = 0; 
-     	/** 
-     	 * Attribute for handling variable elements. Acceptable values are 
-     	 * NON_IGNORABLE (default) which treats all the codepoints with 
-     	 * non-ignorable primary weights in the same way, and SHIFTED which 
-     	 * causes codepoints with primary weights that are equal or below the 
-     	 * variable top value to be ignored on primary level and moved to the 
-     	 * quaternary level.
-     	 */
-     	static final int ALTERNATE_HANDLING_ = 1;
-     	/** 
-     	 * Controls the ordering of upper and lower case letters. Acceptable 
-     	 * values are OFF (default), which orders upper and lower case letters 
-     	 * in accordance to their tertiary weights, UPPER_FIRST which forces 
-     	 * upper case letters to sort before lower case letters, and 
-     	 * LOWER_FIRST which does the opposite. 
-     	 */
-     	static final int CASE_FIRST_ = 2;
-     	/** 
-     	 * Controls whether an extra case level (positioned before the third 
-     	 * level) is generated or not. Acceptable values are OFF (default),
-     	 * when case level is not generated, and ON which causes the case
-     	 * level to be generated. Contents of the case level are affected by
-     	 * the value of CASE_FIRST attribute. A simple way to ignore accent 
-     	 * differences in a string is to set the strength to PRIMARY and 
-     	 * enable case level. 
-     	 */
-     	static final int CASE_LEVEL_ = 3;
-     	/** 
-     	 * Controls whether the normalization check and necessary 
-     	 * normalizations are performed. When set to OFF (default) no 
-     	 * normalization check is performed. The correctness of the result is 
-     	 * guaranteed only if the input data is in so-called FCD form (see 
-     	 * users manual for more info). When set to ON, an incremental check 
-     	 * is performed to see whether the input data is in the FCD form. If 
-     	 * the data is not in the FCD form, incremental NFD normalization is 
-     	 * performed. 
-     	 */
-     	static final int NORMALIZATION_MODE_ = 4; 
-     	/** 
-     	 * The strength attribute. Can be either PRIMARY, SECONDARY, TERTIARY, 
-     	 * QUATERNARY or IDENTICAL. The usual strength for most locales 
-     	 * (except Japanese) is tertiary. Quaternary strength is useful when 
-     	 * combined with shifted setting for alternate handling attribute and 
-     	 * for JIS x 4061 collation, when it is used to distinguish between 
-     	 * Katakana  and Hiragana (this is achieved by setting the 
-     	 * HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level is 
-     	 * affected only by the number of non ignorable code points in the 
-     	 * string. Identical strength is rarely useful, as it amounts to 
-     	 * codepoints of the NFD form of the string. 
-     	 */
-     	static final int STRENGTH_ = 5;
-     	/** 
-     	 * When turned on, this attribute positions Hiragana before all  
-     	 * non-ignorables on quaternary level. This is a sneaky way to produce 
-     	 * JIS sort order. 
-     	 */     
-     	static final int HIRAGANA_QUATERNARY_MODE_ = 6;
-     	/**
-     	 * Attribute count
-     	 */
-     	static final int LIMIT_ = 7;
-	} 
+		static final int SHIFTED_ = 20;
+		/** 
+		 * Valid for ALTERNATE_HANDLING. Alternate handling will be non 
+		 * ignorable 
+		 */
+		static final int NON_IGNORABLE_ = 21;
+		/** 
+		 * Valid for CASE_FIRST - lower case sorts before upper case 
+		 */
+		static final int LOWER_FIRST_ = 24;
+		/** 
+		 * Upper case sorts before lower case 
+		 */
+		static final int UPPER_FIRST_ = 25;
+	    /** 
+		 * Valid for NORMALIZATION_MODE ON and OFF are also allowed for this 
+		 * attribute 
+		 */
+		static final int ON_WITHOUT_HANGUL_ = 28;
+		/**
+		 * Number of attribute values
+		 */
+	    static final int LIMIT_ = 29;
+	};
+	    
+	/** 
+	 * Attributes that collation service understands. All the attributes can 
+	 * take DEFAULT value, as well as the values specific to each one. 
+	 */
+	static interface Attribute 
+	{
+	    /** 
+	     * Attribute for direction of secondary weights - used in French.
+	     * Acceptable values are ON, which results in secondary weights being 
+	     * considered backwards and OFF which treats secondary weights in the 
+	     * order they appear.
+	     */
+    	static final int FRENCH_COLLATION_ = 0; 
+	    /** 
+	     * Attribute for handling variable elements. Acceptable values are 
+	     * NON_IGNORABLE (default) which treats all the codepoints with 
+	     * non-ignorable primary weights in the same way, and SHIFTED which 
+	     * causes codepoints with primary weights that are equal or below the 
+	     * variable top value to be ignored on primary level and moved to the 
+	     * quaternary level.
+	     */
+	    static final int ALTERNATE_HANDLING_ = 1;
+	    /** 
+	     * Controls the ordering of upper and lower case letters. Acceptable 
+	     * values are OFF (default), which orders upper and lower case letters 
+	     * in accordance to their tertiary weights, UPPER_FIRST which forces 
+	     * upper case letters to sort before lower case letters, and 
+	     * LOWER_FIRST which does the opposite. 
+	     */
+	    static final int CASE_FIRST_ = 2;
+	    /** 
+	     * Controls whether an extra case level (positioned before the third 
+	     * level) is generated or not. Acceptable values are OFF (default),
+	     * when case level is not generated, and ON which causes the case
+	     * level to be generated. Contents of the case level are affected by
+	     * the value of CASE_FIRST attribute. A simple way to ignore accent 
+	     * differences in a string is to set the strength to PRIMARY and 
+	     * enable case level. 
+	     */
+	    static final int CASE_LEVEL_ = 3;
+	    /** 
+	     * Controls whether the normalization check and necessary 
+	     * normalizations are performed. When set to OFF (default) no 
+	     * normalization check is performed. The correctness of the result is 
+	     * guaranteed only if the input data is in so-called FCD form (see 
+	     * users manual for more info). When set to ON, an incremental check 
+	     * is performed to see whether the input data is in the FCD form. If 
+	     * the data is not in the FCD form, incremental NFD normalization is 
+	     * performed. 
+	     */
+	    static final int NORMALIZATION_MODE_ = 4; 
+	    /** 
+	     * The strength attribute. Can be either PRIMARY, SECONDARY, TERTIARY, 
+	     * QUATERNARY or IDENTICAL. The usual strength for most locales 
+	     * (except Japanese) is tertiary. Quaternary strength is useful when 
+	     * combined with shifted setting for alternate handling attribute and 
+	     * for JIS x 4061 collation, when it is used to distinguish between 
+	     * Katakana  and Hiragana (this is achieved by setting the 
+	     * HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level is 
+         * affected only by the number of non ignorable code points in the 
+	     * string. Identical strength is rarely useful, as it amounts to 
+	     * codepoints of the NFD form of the string. 
+	     */
+	    static final int STRENGTH_ = 5;
+	    /** 
+	     * When turned on, this attribute positions Hiragana before all  
+	     * non-ignorables on quaternary level. This is a sneaky way to produce 
+	     * JIS sort order. 
+	     */     
+	    static final int HIRAGANA_QUATERNARY_MODE_ = 6;
+        /**
+	     * Attribute count
+	     */
+	    static final int LIMIT_ = 7;
+	};
 	
-    // protected data members ------------------------------------------------
-
 	/**
+     * DataManipulate singleton
+     */
+    static class DataManipulate implements Trie.DataManipulate
+    {
+    	// public methods ----------------------------------------------------
+    	
+    	/**
+	     * Internal method called to parse a lead surrogate's ce for the offset
+	     * to the next trail surrogate data.
+	     * @param ce collation element of the lead surrogate
+	     * @return data offset or 0 for the next trail surrogate
+	     * @draft 2.2
+	     */
+	    public final int getFoldingOffset(int ce)
+	    {
+	    	if (isSpecial(ce) && getTag(ce) == CE_SURROGATE_TAG_) {
+	    		return (ce & 0xFFFFFF);
+	    	}
+	    	return 0;
+	    } 
+	    
+	    /**
+	     * Get singleton object
+	     */
+	    public static final DataManipulate getInstance()
+	    {
+	    	if (m_instance_ == null) {
+	    		m_instance_ =  new DataManipulate();
+	    	}
+	    	return m_instance_;
+	    }
+	    
+	    // private data member ----------------------------------------------
+	    
+	    /**
+	     * Singleton instance
+	     */
+	    private static DataManipulate m_instance_;
+	    
+	    // private constructor ----------------------------------------------
+	    
+	    /**
+	     * private to prevent initialization
+	     */
+	    private DataManipulate()
+	    {
+	    }
+    };
+    
+    // package private data member -------------------------------------------
+    
+    static final byte BYTE_FIRST_TAILORED_ = (byte)0x04;
+    static final byte BYTE_COMMON_ = (byte)0x05;
+    static final int COMMON_TOP_2_ = 0x86; // int for unsigness
+    static final int COMMON_BOTTOM_2_ = BYTE_COMMON_;
+    /**
+	 * Case strength mask
+	 */
+	static final int CE_CASE_BIT_MASK_ = 0xC0;
+	static final int CE_TAG_SHIFT_ = 24;
+	static final int CE_TAG_MASK_ = 0x0F000000;
+	
+	static final int CE_SPECIAL_FLAG_ = 0xF0000000;
+    /** 
+     * Lead surrogate that is tailored and doesn't start a contraction 
+     */
+    static final int CE_SURROGATE_TAG_ = 5;  
+	/**
+  	 * Mask to get the primary strength of the collation element
+  	 */
+  	static final int CE_PRIMARY_MASK_ = 0xFFFF0000;
+  	/**
+  	 * Mask to get the secondary strength of the collation element
+  	 */
+   	static final int CE_SECONDARY_MASK_ = 0xFF00;
+   	/**
+  	 * Mask to get the tertiary strength of the collation element
+  	 */
+   	static final int CE_TERTIARY_MASK_ = 0xFF;
+   	/**
+   	 * Primary strength shift 
+   	 */
+	static final int CE_PRIMARY_SHIFT_ = 16;
+	/** 
+	 * Secondary strength shift 
+	 */
+	static final int CE_SECONDARY_SHIFT_ = 8;
+   	/**
+   	 * Continuation marker
+   	 */
+   	static final int CE_CONTINUATION_MARKER_ = 0xC0;
+   	
+   	/**
 	 * Size of collator raw data headers and options before the expansion
 	 * data. This is used when expansion ces are to be retrieved. ICU4C uses
 	 * the expansion offset starting from UCollator.UColHeader, hence ICU4J
 	 * will have to minus that off to get the right expansion ce offset. In
 	 * number of ints.
 	 */
-	protected int m_expansionOffset_;
+	int m_expansionOffset_;
 	/**
 	 * Size of collator raw data headers, options and expansions before
 	 * contraction data. This is used when contraction ces are to be retrieved. 
@@ -1007,63 +1176,63 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	 * ICU4J will have to minus that off to get the right contraction ce 
 	 * offset. In number of chars.
 	 */
-	protected int m_contractionOffset_;
+	int m_contractionOffset_;
     /**
      * Flag indicator if Jamo is special
      */
-    protected boolean m_isJamoSpecial_;
+    boolean m_isJamoSpecial_;
  
  	// Collator options ------------------------------------------------------   
- 	protected int m_defaultVariableTopValue_;
-	protected boolean m_defaultIsFrenchCollation_;
-	protected boolean m_defaultIsAlternateHandlingShifted_; 
-    protected int m_defaultCaseFirst_;
-    protected boolean m_defaultIsCaseLevel_;
-    protected int m_defaultDecomposition_;
-    protected int m_defaultStrength_;
-    protected boolean m_defaultIsHiragana4_;
+ 	int m_defaultVariableTopValue_;
+	boolean m_defaultIsFrenchCollation_;
+	boolean m_defaultIsAlternateHandlingShifted_; 
+    int m_defaultCaseFirst_;
+    boolean m_defaultIsCaseLevel_;
+    int m_defaultDecomposition_;
+    int m_defaultStrength_;
+    boolean m_defaultIsHiragana4_;
  	/**
  	 * Value of the variable top
  	 */
-    protected int m_variableTopValue_;
+    int m_variableTopValue_;
     /** 
      * Attribute for special Hiragana 
      */
-    protected boolean m_isHiragana4_;         
+    boolean m_isHiragana4_;         
 	/**
      * Case sorting customization
      */
-    protected int m_caseFirst_;
+    int m_caseFirst_;
     
     // end Collator options --------------------------------------------------
-    
+       
     /**
      * Expansion table
      */
-    protected int m_expansion_[];
+    int m_expansion_[];
     /**
      * Contraction index table
      */
-    protected char m_contractionIndex_[];
+    char m_contractionIndex_[];
     /**
      * Contraction CE table
      */
-    protected int m_contractionCE_[];
+    int m_contractionCE_[];
     /**
      * Data trie
      */
-    protected IntTrie m_trie_;
+    IntTrie m_trie_;
     /**
      * Table to store all collation elements that are the last element of an
      * expansion. This is for use in StringSearch.
      */
-    protected int m_expansionEndCE_[];
+    int m_expansionEndCE_[];
     /**
      * Table to store the maximum size of any expansions that end with the 
      * corresponding collation element in m_expansionEndCE_. For use in
      * StringSearch too
      */
-    protected byte m_expansionEndCEMaxSize_[];
+    byte m_expansionEndCEMaxSize_[];
     /**
      * Heuristic table to store information on whether a char character is 
      * considered "unsafe". "Unsafe" character are combining marks or those 
@@ -1072,33 +1241,33 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
      * unsafe. If we have another contraction "ZA" with the one above, then 
      * 'A', 'B', 'C' are "unsafe" but 'Z' is not. 
      */
-    protected byte m_unsafe_[];
+    byte m_unsafe_[];
     /**
      * Table to store information on whether a codepoint can occur as the last
      * character in a contraction
      */
-    protected byte m_contractionEnd_[];
+    byte m_contractionEnd_[];
     /**
-     * Table for UCA use, may be removed
+     * Table for UCA and builder use
      */
-    protected char m_UCAContraction_[];
+    char m_UCAContraction_[];
 	/**
 	 * Original collation rules
 	 */
-	protected String m_rules_;
+	String m_rules_;
 	/**
      * The smallest "unsafe" codepoint
      */
-    protected char m_minUnsafe_;
+    char m_minUnsafe_;
     /**
 	 * The smallest codepoint that could be the end of a contraction
 	 */
-	protected char m_minContractionEnd_;
+	char m_minContractionEnd_;
 	
 	/**
      * UnicodeData.txt property object
      */
-    protected static final RuleBasedCollator UCA_;  
+    static final RuleBasedCollator UCA_;  
     
     // block to initialise character property database
     static
@@ -1124,99 +1293,12 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
         	e.printStackTrace();
             throw new RuntimeException(e.getMessage());
         }
-    }    
+    } 
     
-    // protected constants ---------------------------------------------------
+    // package private constructors ------------------------------------------
     
-    protected static final int CE_SPECIAL_FLAG_ = 0xF0000000;
-    /** 
-     * Lead surrogate that is tailored and doesn't start a contraction 
-     */
-    protected static final int CE_SURROGATE_TAG_ = 5;  
-  
-  	/**
-	 * Minimum size required for the binary collation data in bytes.
-	 * Size of UCA header + size of options to 4 bytes
-	 */
-	private static final int MIN_BINARY_DATA_SIZE_ = (41 + 8) << 2;     
-	/**
-  	 * Mask to get the primary strength of the collation element
-  	 */
-  	protected static final int CE_PRIMARY_MASK_ = 0xFFFF0000;
-  	/**
-  	 * Mask to get the secondary strength of the collation element
-  	 */
-   	protected static final int CE_SECONDARY_MASK_ = 0xFF00;
-   	/**
-  	 * Mask to get the tertiary strength of the collation element
-  	 */
-   	protected static final int CE_TERTIARY_MASK_ = 0xFF;
-   	/**
-   	 * Primary strength shift 
-   	 */
-	protected static final int CE_PRIMARY_SHIFT_ = 16;
-	/** 
-	 * Secondary strength shift 
-	 */
-	protected static final int CE_SECONDARY_SHIFT_ = 8;
-
-   	/**
-   	 * Continuation marker
-   	 */
-   	protected static final int CE_CONTINUATION_MARKER_ = 0xC0;
-    
-    // end protected constants -----------------------------------------------
-    
-    // protected constructor -------------------------------------------------
-  
-  	/**
-     * Constructors a RuleBasedCollator from the argument locale.
-     * If no resource bundle is associated with the locale, UCA is used 
-     * instead.
-     * @param locale
-     * @exception Exception thrown when there's an error creating the Collator
-     */
-    protected RuleBasedCollator(Locale locale) throws Exception
-    {
-    	ResourceBundle rb = ICULocaleData.getLocaleElements(locale);
- 
-    	if (rb != null) {
-    		byte map[] = (byte [])rb.getObject("%%CollationBin");
-			// synwee todo: problem, data in little endian and
-			// ICUListResourceBundle should not calculate size by
-			// using .available() that only gives the buffer size
-			BufferedInputStream input = 
-						new BufferedInputStream(new ByteArrayInputStream(map));
-			CollatorReader reader = new CollatorReader(input, false);
-			if (map.length > MIN_BINARY_DATA_SIZE_) {
-				// synwee todo: undo when problem solved
-				reader.read(this);
-    		} 
-    		else {
-    			reader.readHeader(this);
-    			reader.readOptions(this);
-    			// duplicating UCA_'s data
-    			m_expansion_ = UCA_.m_expansion_;
-    			m_contractionIndex_ = UCA_.m_contractionIndex_;
-    			m_contractionCE_ = UCA_.m_contractionCE_;
-    			m_trie_ = UCA_.m_trie_;
-				m_expansionEndCE_ = UCA_.m_expansionEndCE_;
-    			m_expansionEndCEMaxSize_ = UCA_.m_expansionEndCEMaxSize_;
-    			m_unsafe_ = UCA_.m_unsafe_;
-    			m_contractionEnd_ = UCA_.m_contractionEnd_;
-    			m_minUnsafe_ = UCA_.m_minUnsafe_; 
-    	     	m_minContractionEnd_ = UCA_.m_minContractionEnd_;
-    		}
-    		Object rules = rb.getObject("CollationElements");
-    		if (rules != null) {
-     			m_rules_ = (String)((Object[][])rules)[0][1];
-    		}
-    		init();
-    	}
-    }
-    
-  	/**
-    * <p>Protected constructor for use by subclasses. 
+    /**
+    * <p>Private contructor for use by subclasses. 
     * Public access to creating Collators is handled by the API 
     * Collator.getInstance() or RuleBasedCollator(String rules).
     * </p>
@@ -1225,41 +1307,68 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
     * </p>
     * @draft 2.2
     */
-    protected RuleBasedCollator() throws Exception
+    RuleBasedCollator() 
     {
     }
-  	
-    // protected methods -----------------------------------------------------
+    
+    // package private methods -----------------------------------------------
     
     /**
-     * Initializes the RuleBasedCollator
+     * Sets this collator to use the tables in UCA. Note options not taken
+     * care of here.
      */
-    protected final void init()
+    final void setWithUCATables()
     {
-    	for (m_minUnsafe_ = 0; m_minUnsafe_ < DEFAULT_MIN_HEURISTIC_; 
-    	     m_minUnsafe_ ++) {  
-    		// Find the smallest unsafe char.
-        	if (isUnsafe(m_minUnsafe_)) {
-        		break;
-        	}
-    	}
-    	
-    	for (m_minContractionEnd_ = 0; 
-    	     m_minContractionEnd_ < DEFAULT_MIN_HEURISTIC_; 
-    	     m_minContractionEnd_ ++) {  
-    	    // Find the smallest contraction-ending char.
-        	if (isContractionEnd(m_minContractionEnd_)) {
-        		break;
-        	}
-    	}
-    	m_strength_ = m_defaultStrength_;
-    	m_decomposition_ = m_defaultDecomposition_;
-    	m_isFrenchCollation_ = m_defaultIsFrenchCollation_;
-    	m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_;
-    	m_isCaseLevel_ = m_defaultIsCaseLevel_;
-    	m_caseFirst_ = m_defaultCaseFirst_;
-    	m_isHiragana4_ = m_defaultIsHiragana4_;
-    	updateInternalState();
+        m_expansion_ = UCA_.m_expansion_;
+        m_contractionIndex_ = UCA_.m_contractionIndex_;
+        m_contractionCE_ = UCA_.m_contractionCE_;
+        m_trie_ = UCA_.m_trie_;
+        m_expansionEndCE_ = UCA_.m_expansionEndCE_;
+	    m_expansionEndCEMaxSize_ = UCA_.m_expansionEndCEMaxSize_;
+	    m_unsafe_ = UCA_.m_unsafe_;
+	    m_contractionEnd_ = UCA_.m_contractionEnd_;
+	    m_minUnsafe_ = UCA_.m_minUnsafe_; 
+        m_minContractionEnd_ = UCA_.m_minContractionEnd_;
+    }
+    
+    /**
+     * Sets this collator to use the all options and tables in UCA. 
+     */
+    final void setWithUCAData()
+    {
+    	m_addition3_ = UCA_.m_addition3_;
+    	m_bottom3_ = UCA_.m_bottom3_;
+    	m_bottomCount3_ = UCA_.m_bottomCount3_;
+    	m_caseFirst_ = UCA_.m_caseFirst_;
+    	m_caseSwitch_ = UCA_.m_caseSwitch_;
+    	m_common3_ = UCA_.m_common3_;
+    	m_contractionOffset_ = UCA_.m_contractionOffset_;
+    	setDecomposition(UCA_.getDecomposition());
+    	m_defaultCaseFirst_ = UCA_.m_defaultCaseFirst_;
+    	m_defaultDecomposition_ = UCA_.m_defaultDecomposition_;
+    	m_defaultIsAlternateHandlingShifted_ 
+    	                           = UCA_.m_defaultIsAlternateHandlingShifted_;
+    	m_defaultIsCaseLevel_ = UCA_.m_defaultIsCaseLevel_;
+    	m_defaultIsFrenchCollation_ = UCA_.m_defaultIsFrenchCollation_;
+    	m_defaultIsHiragana4_ = UCA_.m_defaultIsHiragana4_;
+    	m_defaultStrength_ = UCA_.m_defaultStrength_;
+    	m_defaultVariableTopValue_ = UCA_.m_defaultVariableTopValue_;
+    	m_expansionOffset_ = UCA_.m_expansionOffset_;
+    	m_isAlternateHandlingShifted_ = UCA_.m_isAlternateHandlingShifted_;
+    	m_isCaseLevel_ = UCA_.m_isCaseLevel_;
+    	m_isFrenchCollation_ = UCA_.m_isFrenchCollation_;
+    	m_isHiragana4_ = UCA_.m_isHiragana4_;
+    	m_isJamoSpecial_ = UCA_.m_isJamoSpecial_;
+    	m_isSimple3_ = UCA_.m_isSimple3_;
+    	m_mask3_ = UCA_.m_mask3_;
+    	m_minContractionEnd_ = UCA_.m_minContractionEnd_;
+    	m_minUnsafe_ = UCA_.m_minUnsafe_;
+    	m_rules_ = UCA_.m_rules_;
+    	setStrength(UCA_.getStrength());
+    	m_top3_ = UCA_.m_top3_;
+    	m_topCount3_ = UCA_.m_topCount3_;
+    	m_variableTopValue_ = UCA_.m_variableTopValue_;
+    	setWithUCATables();
     }
     
     /**
@@ -1272,7 +1381,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
      * @param ch character to determin
      * @return true if ch is unsafe, false otherwise
      */
-	protected final boolean isUnsafe(char ch) 
+	final boolean isUnsafe(char ch) 
 	{
     	if (ch < m_minUnsafe_) {
 	        return false;
@@ -1296,7 +1405,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	 * otherwise it is not deterministic.
 	 * @param ch character to be determined
 	 */
-	protected final boolean isContractionEnd(char ch) 
+	final boolean isContractionEnd(char ch) 
 	{
 		if (UTF16.isTrailSurrogate(ch)) {
       		return true;
@@ -1315,93 +1424,11 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	}
 	
 	/**
-	 * Resets the internal case data members and compression values.
-	 */
-	protected void updateInternalState() 
-	{
-      	if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
-        	m_caseSwitch_ = (byte)CASE_SWITCH_;
-      	} 
-      	else {
-        	m_caseSwitch_ = NO_CASE_SWITCH_;
-      	}
-
-      	if (m_isCaseLevel_ || m_caseFirst_ == AttributeValue.OFF_) {
-        	m_mask3_ = CE_REMOVE_CASE_;
-        	m_common3_ = COMMON_NORMAL_3_;
-        	m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_OFF_;
-        	m_top3_ = COMMON_TOP_CASE_SWITCH_OFF_3_;
-        	m_bottom3_ = COMMON_BOTTOM_3_;
-      	} 
-      	else {
-        	m_mask3_ = (byte)CE_KEEP_CASE_;
-        	m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_ON_;
-        	if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
-          		m_common3_ = COMMON_UPPER_FIRST_3_;
-          		m_top3_ = COMMON_TOP_CASE_SWITCH_UPPER_3_;
-          		m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_UPPER_3_;
-        	} else {
-          		m_common3_ = COMMON_NORMAL_3_;
-          		m_top3_ = COMMON_TOP_CASE_SWITCH_LOWER_3_;
-          		m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_LOWER_3_;
-        	}
-      	}
-
-      	// Set the compression values
-      	int total3 = m_top3_ - COMMON_BOTTOM_3_ - 1;
-      	// we multilply double with int, but need only int
-      	m_topCount3_ = (int)(PROPORTION_3_ * total3); 
-      	m_bottomCount3_ = total3 - m_topCount3_;
-
-      	if (!m_isCaseLevel_ && m_strength_ == AttributeValue.TERTIARY_ 
-          	&& !m_isFrenchCollation_ && !m_isAlternateHandlingShifted_) {
-        	m_isSimple3_ = true;
-      	} 
-      	else {
-        	m_isSimple3_ = false;
-      	}
-	}
-	
-	/**
- 	 * <p>Converts the C attribute index and values for use and stores it into 
- 	 * the relevant default attribute variable.</p>
- 	 * <p>Note internal use, no sanity checks done on arguments</p>
- 	 */
-    protected void setAttributeDefault(int attribute, int value)
-    {
-    	switch (attribute) {
-    		case Attribute.FRENCH_COLLATION_:
-    			m_defaultIsFrenchCollation_ = (value == AttributeValue.ON_);
-    			break;
-    		case Attribute.ALTERNATE_HANDLING_:
-    			m_defaultIsAlternateHandlingShifted_ = 
-    			                            (value == AttributeValue.SHIFTED_);
-    			break;
-    		case Attribute.CASE_FIRST_:
-    			m_defaultCaseFirst_ = value;
-        		break;
-    		case Attribute.CASE_LEVEL_:
-    			m_defaultIsCaseLevel_ = (value == AttributeValue.ON_);
-    			break;
-    		case Attribute.NORMALIZATION_MODE_:
-    			if (value == AttributeValue.ON_) {
-    				value = Collator.CANONICAL_DECOMPOSITION;
-    			}
-    			m_defaultDecomposition_ = value;
-    			break;
-    		case Attribute.STRENGTH_:
-    			m_defaultStrength_ = value;
-    		case Attribute.HIRAGANA_QUATERNARY_MODE_:
-    			m_defaultIsHiragana4_ = (value == AttributeValue.ON_);
-    	}
-    }
-    
-    /**
 	 * Retrieve the tag of a special ce
 	 * @param ce ce to test
 	 * @return tag of ce
 	 */
-	protected static int getTag(int ce) 
+	static int getTag(int ce) 
 	{
 		return (ce & CE_TAG_MASK_) >> CE_TAG_SHIFT_;
 	}
@@ -1411,60 +1438,62 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	 * @param ce to check
 	 * @return true if ce is special
 	 */
-	protected static boolean isSpecial(int ce)
+	static boolean isSpecial(int ce)
 	{
 		return (ce & CE_SPECIAL_FLAG_) == CE_SPECIAL_FLAG_; 
 	}
-	
-	/**
-	 * Getting the mask for collation strength
-	 * @param strength collation strength
- 	 * @return collation element mask
-	 */
-	protected static final int getMask(int strength) 
-	{
-	    switch (strength) 
-	    {
-	    	case Collator.PRIMARY:
-	        	return CE_PRIMARY_MASK_;
-	    	case Collator.SECONDARY:
-	        	return CE_SECONDARY_MASK_ | CE_PRIMARY_MASK_;
-	    	default:
-	        	return CE_TERTIARY_MASK_ | CE_SECONDARY_MASK_ 
-	        											| CE_PRIMARY_MASK_;
-	    }
-	}
 
-	/** 
-	 * Gets the primary weights from a CE 
-	 * @param ce collation element
-	 * @return the primary weight of the collation element
-	 */
-	protected static final int getPrimaryWeight(int ce)
-	{
-		return ((ce) & CE_PRIMARY_MASK_) >> CE_PRIMARY_SHIFT_;
-	}
-	
-	/** 
-	 * Gets the secondary weights from a CE 
-	 * @param ce collation element
-	 * @return the secondary weight of the collation element
-	 */
-	protected static final int getSecondaryWeight(int ce)
-	{
-		return (ce & CE_SECONDARY_MASK_) >> CE_SECONDARY_SHIFT_;
-	}
-	
-	/** 
-	 * Gets the tertiary weights from a CE 
-	 * @param ce collation element
-	 * @return the tertiary weight of the collation element
-	 */
-	protected static final int getTertiaryWeight(int ce)
-	{
-		return ce & CE_TERTIARY_MASK_;
-	}
-	
+    /**
+     * Checks if the argument ce is a continuation
+     * @param ce collation element to test
+     * @return true if ce is a continuation
+     */
+    static final boolean isContinuation(int ce) 
+    {
+        return ce != CollationElementIterator.NULLORDER 
+                       && (ce & CE_CONTINUATION_TAG_) == CE_CONTINUATION_TAG_;
+    }
+    
+    // protected constructor -------------------------------------------------
+  
+    /**
+     * Constructors a RuleBasedCollator from the argument locale.
+     * If no resource bundle is associated with the locale, UCA is used 
+     * instead.
+     * @param locale
+     * @exception Exception thrown when there's an error creating the Collator
+     */
+    RuleBasedCollator(Locale locale) throws Exception
+    {
+	    ResourceBundle rb = ICULocaleData.getLocaleElements(locale);
+	 
+	    if (rb != null) {
+		    byte map[] = (byte [])rb.getObject("%%CollationBin");
+		    BufferedInputStream input = 
+			            new BufferedInputStream(new ByteArrayInputStream(map));
+			CollatorReader reader = new CollatorReader(input, false);
+			if (map.length > MIN_BINARY_DATA_SIZE_) {
+				reader.read(this);
+		    } 
+		    else {
+			    reader.readHeader(this);
+			    reader.readOptions(this);
+			    // duplicating UCA_'s data
+			    setWithUCATables();
+		    }
+		    Object rules = rb.getObject("CollationElements");
+		    if (rules != null) {
+		        m_rules_ = (String)((Object[][])rules)[0][1];
+		    }
+		    init();
+	    }
+	    else {
+	    	setWithUCAData();
+	    }
+    } 
+    
+    // private inner classes ------------------------------------------------
+    
     // private variables -----------------------------------------------------
 
     /**
@@ -1528,7 +1557,6 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	/**
 	 * Case strength mask
 	 */
-	private static final int CE_CASE_BIT_MASK_ = 0xC0;
 	private static final int CE_CASE_MASK_3_ = 0xFF;
 	/** 
 	 * Sortkey size factor. Values can be changed.
@@ -1547,14 +1575,10 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
     private static final byte BYTE_SORTKEY_GLUE_ = (byte)0x02;
     private static final byte BYTE_SHIFT_PREFIX_ = (byte)0x03;
     private static final byte BYTE_UNSHIFTED_MIN_ = BYTE_SHIFT_PREFIX_;
-    private static final byte BYTE_FIRST_TAILORED_ = (byte)0x04;
-    private static final byte BYTE_COMMON_ = (byte)0x05;
     private static final byte BYTE_FIRST_UCA_ = BYTE_COMMON_;
     private static final byte BYTE_LAST_LATIN_PRIMARY_ = (byte)0x4C;
     private static final byte BYTE_FIRST_NON_LATIN_PRIMARY_ = (byte)0x4D;
     private static final byte BYTE_UNSHIFTED_MAX_ = (byte)0xFF;
-	private static final int COMMON_BOTTOM_2_ = BYTE_COMMON_;
-	private static final int COMMON_TOP_2_ = 0x86; // int for unsigness
 	private static final int TOTAL_2_ = COMMON_TOP_2_ - COMMON_BOTTOM_2_ - 1; 
 	private static final int FLAG_BIT_MASK_CASE_SWITCH_OFF_ = 0x80;
 	private static final int FLAG_BIT_MASK_CASE_SWITCH_ON_ = 0x40;
@@ -1572,6 +1596,12 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	private static final int COMMON_NORMAL_3_ = COMMON_BOTTOM_3_;
 	private static final int COMMON_4_ = (byte)0xFF;
 	
+	/**
+	 * Minimum size required for the binary collation data in bytes.
+	 * Size of UCA header + size of options to 4 bytes
+	 */
+	private static final int MIN_BINARY_DATA_SIZE_ = (41 + 8) << 2;     
+	
 	/**
 	 * If this collator is to generate only simple tertiaries for fast path
 	 */
@@ -1582,7 +1612,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
      */
     private boolean m_isFrenchCollation_;
     /**
-     * Flag indicating if shifted is requested for quartenary alternate
+     * Flag indicating if shifted is requested for Quaternary alternate
      * handling. If this is not true, the default for alternate handling will
      * be non-ignorable.
      */
@@ -1591,9 +1621,6 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
      * Extra case level for sorting
      */
     private boolean m_isCaseLevel_;
-    
-    private static final int CE_TAG_SHIFT_ = 24;
-	private static final int CE_TAG_MASK_ = 0x0F000000;
 	
 	private static final int SORT_BUFFER_INIT_SIZE_ = 128;
 	private static final int SORT_BUFFER_INIT_SIZE_1_ = 
@@ -1621,19 +1648,8 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	 * CE buffer size
 	 */
 	private static final int CE_BUFFER_SIZE_ = 512;
-
-    // private methods -------------------------------------------------------
     
-    /**
-     * Checks if the argument ce is a continuation
-     * @param ce collation element to test
-     * @return true if ce is a continuation
-     */
-    private static final boolean isContinuation(int ce) 
-    {
-    	return ce != CollationElementIterator.NULLORDER 
-    					&& (ce & CE_CONTINUATION_TAG_) == CE_CONTINUATION_TAG_;
-    }
+    // private methods -------------------------------------------------------
     
     /**
      * Gets the 2 bytes of primary order and adds it to the primary byte array
@@ -1645,8 +1661,8 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
      * 			a continuation ce
      * @param doShift flag indicating if ce is to be shifted
      * @param leadPrimary lead primary used for compression
-     * @param commonBottom4 common byte value for quartenary
-     * @param bottomCount4 smallest byte value for quartenary
+     * @param commonBottom4 common byte value for Quaternary
+     * @param bottomCount4 smallest byte value for Quaternary
      * @return the new lead primary for compression
      */
     private final int doPrimaryBytes(int ce, byte bytes[][], int bytescount[],
@@ -1656,7 +1672,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
     {
     	
     	int p2 = (ce >>= 16) & LAST_BYTE_MASK_; // in ints for unsigned 
-        int p1 = (ce >> 8) & LAST_BYTE_MASK_;  // comparison
+        int p1 = ce >>> 8;  // comparison
     	if (doShift) {
     		if (count[4] > 0) {
             	while (count[4] > bottomCount4) {
@@ -1931,17 +1947,17 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	}
 	
 	/**
-	 * Gets the quartenary byte and adds it to the quartenary byte array
+	 * Gets the Quaternary byte and adds it to the Quaternary byte array
      * @param bytes array of byte arrays for each strength
      * @param bytescount array of the size of each strength byte arrays 
      * @param count array of counters for each of the strength
      * @param isCodePointHiragana flag indicator if the previous codepoint 
      * 			we dealt with was Hiragana
-     * @param commonBottom4 smallest common quartenary byte 
-     * @param bottomCount4 smallest quartenary byte 
-     * @param hiragana4 hiragana quartenary byte
+     * @param commonBottom4 smallest common Quaternary byte 
+     * @param bottomCount4 smallest Quaternary byte 
+     * @param hiragana4 hiragana Quaternary byte
 	 */
-	private final void doQuartenaryBytes(byte bytes[][], int bytescount[], 
+	private final void doQuaternaryBytes(byte bytes[][], int bytescount[], 
 									  int count[],	
 									  boolean isCodePointHiragana,
 									  int commonBottom4, int bottomCount4,
@@ -1985,9 +2001,10 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 									   int count[], boolean doFrench,
 									   byte hiragana4, int commonBottom4, 
 									   int bottomCount4)
+									   
 	{
-		int backupDecomposition = m_decomposition_;
-		m_decomposition_ = NO_DECOMPOSITION; // have to revert to backup later
+		int backupDecomposition = getDecomposition();
+		setDecomposition(NO_DECOMPOSITION); // have to revert to backup later
     	CollationElementIterator coleiter = 
     							new CollationElementIterator(source, this);
     	
@@ -2053,12 +2070,12 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
             }
                 
             if (compare[4] && notIsContinuation) { // compare quad
-                doQuartenaryBytes(bytes, bytescount, count, 
+                doQuaternaryBytes(bytes, bytescount, count, 
                 			 	coleiter.m_isCodePointHiragana_, 
                 			 	commonBottom4, bottomCount4, hiragana4);
             }
         }
-        m_decomposition_ = backupDecomposition; // reverts to original	
+        setDecomposition(backupDecomposition); // reverts to original	
         if (frenchOffset[0] != -1) {
         	// one last round of checks
     		reverseBuffer(bytes[2], frenchOffset);
@@ -2117,14 +2134,16 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	private final void doFrench(byte bytes[][], int bytescount[], int count[]) 
 	{
 		for (int i = 0; i < bytescount[2]; i ++) {
-		    byte s = bytes[2][bytescount[2] - i - 1];
+			byte s = bytes[2][bytescount[2] - i - 1];
 		    // This is compression code.
 		    if (s == COMMON_2_) {
 		      ++ count[2];
 		    } 
 		    else {
 		      	if (count[2] > 0) {
-		        	if (s > COMMON_2_) { // not necessary for 4th level.
+		      		// getting the unsigned value
+		        	if ((s & LAST_BYTE_MASK_) > COMMON_2_) { 
+		        		// not necessary for 4th level.
 		          		while (count[2] > TOP_COUNT_2_) {
 		            		append(bytes, bytescount, 1, 
 		            					(byte)(COMMON_TOP_2_ - TOP_COUNT_2_));
@@ -2336,21 +2355,38 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	private final int getFirstUnmatchedOffset(String source, String target)
 	{
 		int result = 0;
-		int minlength = source.length();
-		if (minlength > target.length()) {
-			minlength = target.length();
+		int slength = source.length();
+		int tlength = target.length();
+		int minlength = slength;
+		if (minlength > tlength) {
+			minlength = tlength;
 		}
 		while (result < minlength 
 				&& source.charAt(result) == target.charAt(result)) {
 			result ++;
 	    }
-	    if (result > 0 && result < minlength) {
+	    if (result > 0) {
 	        // There is an identical portion at the beginning of the two 
 	        // strings. If the identical portion ends within a contraction or a 
 	        // combining character sequence, back up to the start of that 
-	        // sequence.              
-	        char schar = source.charAt(result); // first differing chars   
-	        char tchar = target.charAt(result);
+	        // sequence.
+	        char schar = 0;
+	        char tchar = 0;
+	        if (result < minlength) {              
+	        	schar = source.charAt(result); // first differing chars   
+	        	tchar = target.charAt(result);
+	        }
+	        else {
+	        	if (slength == tlength) {
+	        		return result;
+	        	}
+	        	else if (slength < tlength) {
+	        		tchar = target.charAt(result);
+	        	}
+	        	else {
+	        		schar = source.charAt(result);
+	        	}
+	        }
 	        if (isUnsafe(schar) || isUnsafe(tchar))
 	        {
 	            // We are stopped in the middle of a contraction or combining
@@ -2394,9 +2430,10 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	 * to compare. It is used when compare gets in trouble and needs to bail 
 	 * out.
 	 * @param source text string
-	 * @param target text string          
+	 * @param target text string
 	 */
 	private final int compareBySortKeys(String source, String target)
+														
 	{
 	    CollationKey sourcekey = getCollationKey(source);
 	    CollationKey targetkey = getCollationKey(target);	
@@ -2432,6 +2469,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 										String source, String target, 
 										int textoffset, int cebuffer[][], 
 									   	int cebuffersize[])
+									   	
 	{
 		// Preparing the context objects for iterating over strings
 	    StringCharacterIterator siter = new StringCharacterIterator(source, 
@@ -2574,6 +2612,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 										CollationElementIterator coleiter,
 										int lowestpvalue, int cebuffer[][], 
 										int cebuffersize[],	int cebufferindex)
+										
 	{
 		boolean shifted = false;
 		int result = CollationElementIterator.IGNORABLE;
@@ -2966,21 +3005,18 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	 */
 	private static final int doIdenticalCompare(String source, String target, 
 												int offset, boolean normalize)
+												
 	{
 	    if (normalize) {
-	        /*
-	        if (unorm_quickCheck(sColl->string, sLen, UNORM_NFD) != UNORM_YES) {
-	            source = unorm_decompose(sColl->writableBuffer, 
-	            							sColl->writableBufSize,
-	                                   		sBuf, sLen, FALSE, FALSE);
+	        if (Normalizer.quickCheck(source, Normalizer.NFD) 
+	        										!= Normalizer.YES) {
+	            source = Normalizer.decompose(source, false);
 	        }
 	
-	        if (unorm_quickCheck(tColl->string, tLen, UNORM_NFD) != UNORM_YES) {
-	            target = unorm_decompose(tColl->writableBuffer, 
-	            							tColl->writableBufSize,
-	                                   		tBuf, tLen, FALSE, FALSE);
+	        if (Normalizer.quickCheck(target, Normalizer.NFD) 
+	        											!= Normalizer.YES) {
+	            target = Normalizer.decompose(target, false);
 	        }
-	        */
 	        offset = 0;
 	    }
 	
@@ -3003,15 +3039,25 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
     	// compare identical prefixes - they do not need to be fixed up
     	char schar = 0;
     	char tchar = 0;
-    	while (true) {
+    	int slength = source.length();
+    	int tlength = target.length();
+    	int minlength = Math.min(slength, tlength);
+    	while (offset < minlength) {
         	schar = source.charAt(offset);
         	tchar = target.charAt(offset ++);
         	if (schar != tchar) {
             	break;
         	}
-        	if (schar == 0) {
-            	return 0;
-        	}
+    	}
+    	
+    	if (schar == tchar && offset == minlength) {
+    		if (slength > minlength) {
+    			return 1;
+    		}
+    		if (tlength > minlength) {
+    			return -1;
+    		}
+    		return 0;
     	}
 
    		//  if both values are in or above the surrogate range, Fix them up.
@@ -3046,6 +3092,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	 * @return true if source after offset is ignorable. false otherwise
 	 */
 	private final boolean checkIgnorable(String source, int offset)
+													
 	{
 		StringCharacterIterator siter = new StringCharacterIterator(source,
 											offset, source.length(), offset);
@@ -3060,4 +3107,83 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate
 	    }
 	    return true; 
 	}
+	
+	/**
+	 * Resets the internal case data members and compression values.
+	 */
+	private void updateInternalState() 
+	{
+      	if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
+        	m_caseSwitch_ = (byte)CASE_SWITCH_;
+      	} 
+      	else {
+        	m_caseSwitch_ = NO_CASE_SWITCH_;
+      	}
+
+      	if (m_isCaseLevel_ || m_caseFirst_ == AttributeValue.OFF_) {
+        	m_mask3_ = CE_REMOVE_CASE_;
+        	m_common3_ = COMMON_NORMAL_3_;
+        	m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_OFF_;
+        	m_top3_ = COMMON_TOP_CASE_SWITCH_OFF_3_;
+        	m_bottom3_ = COMMON_BOTTOM_3_;
+      	} 
+      	else {
+        	m_mask3_ = (byte)CE_KEEP_CASE_;
+        	m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_ON_;
+        	if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
+          		m_common3_ = COMMON_UPPER_FIRST_3_;
+          		m_top3_ = COMMON_TOP_CASE_SWITCH_UPPER_3_;
+          		m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_UPPER_3_;
+        	} else {
+          		m_common3_ = COMMON_NORMAL_3_;
+          		m_top3_ = COMMON_TOP_CASE_SWITCH_LOWER_3_;
+          		m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_LOWER_3_;
+        	}
+      	}
+
+      	// Set the compression values
+      	int total3 = m_top3_ - COMMON_BOTTOM_3_ - 1;
+      	// we multilply double with int, but need only int
+      	m_topCount3_ = (int)(PROPORTION_3_ * total3); 
+      	m_bottomCount3_ = total3 - m_topCount3_;
+
+      	if (!m_isCaseLevel_ && getStrength() == AttributeValue.TERTIARY_ 
+          	&& !m_isFrenchCollation_ && !m_isAlternateHandlingShifted_) {
+        	m_isSimple3_ = true;
+      	} 
+      	else {
+        	m_isSimple3_ = false;
+      	}
+	}
+	
+	/**
+     * Initializes the RuleBasedCollator
+     */
+    private final void init()
+    {
+    	for (m_minUnsafe_ = 0; m_minUnsafe_ < DEFAULT_MIN_HEURISTIC_; 
+    	     m_minUnsafe_ ++) {  
+    		// Find the smallest unsafe char.
+        	if (isUnsafe(m_minUnsafe_)) {
+        		break;
+        	}
+    	}
+    	
+    	for (m_minContractionEnd_ = 0; 
+    	     m_minContractionEnd_ < DEFAULT_MIN_HEURISTIC_; 
+    	     m_minContractionEnd_ ++) {  
+    	    // Find the smallest contraction-ending char.
+        	if (isContractionEnd(m_minContractionEnd_)) {
+        		break;
+        	}
+    	}
+    	setStrength(m_defaultStrength_);
+    	setDecomposition(m_defaultDecomposition_);
+    	m_isFrenchCollation_ = m_defaultIsFrenchCollation_;
+    	m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_;
+    	m_isCaseLevel_ = m_defaultIsCaseLevel_;
+    	m_caseFirst_ = m_defaultCaseFirst_;
+    	m_isHiragana4_ = m_defaultIsHiragana4_;
+    	updateInternalState();
+    }
 }
diff --git a/icu4j/src/com/ibm/icu/text/SearchIterator.java b/icu4j/src/com/ibm/icu/text/SearchIterator.java
index befa1290554..2f8dce21da3 100755
--- a/icu4j/src/com/ibm/icu/text/SearchIterator.java
+++ b/icu4j/src/com/ibm/icu/text/SearchIterator.java
@@ -5,423 +5,715 @@
  *******************************************************************************
  *
  * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/SearchIterator.java,v $ 
- * $Date: 2002/04/03 19:13:56 $ 
- * $Revision: 1.6 $
+ * $Date: 2002/06/21 23:56:48 $ 
+ * $Revision: 1.7 $
  *
  *****************************************************************************************
  */
 
 package com.ibm.icu.text;
 
-import java.text.BreakIterator;
 import java.text.CharacterIterator;
 
 /**
- * <code>SearchIterator</code> is an abstract base class that provides methods
- * to search for a pattern within a text string.  Instances of
- * <code>SearchIterator</code> maintain a current position and scan over
- * the target text, returning the indices the pattern is matched
- * and the length of each match.
  * <p>
- * <code>SearchIterator</code> is an abstract base class that defines a
- * protocol for text searching.  Subclasses provide concrete implementations of
- * various search algorithms.  For example, {@link StringSearch}
- * implements language-sensitive pattern matching based on the comparison rules
- * defined in a {@link java.text.RuleBasedCollator RuleBasedCollator} object.
+ * SearchIterator is an abstract base class that defines a protocol for text 
+ * searching. Subclasses provide concrete implementations of various search 
+ * algorithms. The concrete subclass, StringSearch, is provided and implements 
+ * language-sensitive pattern matching based on the comparison rules defined in 
+ * a RuleBasedCollator object. Instances of SearchIterator maintain a current 
+ * position and scan over the target text, returning the indices where a 
+ * matched is found and the length of each match. Generally, the sequence of
+ * forward matches will be equivalent to the sequence of backward matches.
+ * </p>
  * <p>
- * Internally, <code>SearchIterator</code> scans text using a
- * {@link CharacterIterator}, and is thus able to scan text held
- * by any object implementing that protocol. A <code>StringCharacterIterator</code>
- * is used to scan <code>String</code> objects passed to <code>setText</code>.
+ * Internally, SearchIterator scans text using a CharacterIterator, and is thus 
+ * able to scan text held by any object implementing that protocol. 
+ * </p>
  * <p>
- * <code>SearchIterator</code> provides an API that is similar to that of
- * other text iteration classes such as <code>BreakIterator</code>.  Using this
- * class, it is easy to scan through text looking for all occurances of a
- * given pattern.  The following example uses a <code>StringSearch</code> object to
- * find all instances of "fox" in the target string.  Any other subclass of
- * <code>SearchIterator</code> can be used in an identical manner.
- * <pre><code>
+ * If logical matches are required, BreakIterators can be used to define the 
+ * boundaries of a logical match. For instance the pattern "e" will
+ * not be found in the string "&#92;u00e9" if a CharacterBreakIterator is used.
+ * By default, the SearchIterator does not impose any logic matches, it will 
+ * return any result that matches the pattern. Illustrating with the above 
+ * example, "e" will be found in the string "&#92;u00e9" if no BreakIterator is 
+ * specified.
+ * </p>
+ * <p>
+ * SearchIterator also provides means to handle overlapping matches via the 
+ * API setOverlapping(boolean). For example, if the overlapping mode is set,
+ * searching for the pattern "abab" in the text "ababab" will yield the results
+ * 0 and 2, where else if overlapping is not set, SearchIterator will only 
+ * produce the result of 0. By default the overlapping mode is not set.
+ * </p>
+ * <p>
+ * The APIs in SearchIterator is similar to that of other text iteration 
+ * classes such as the BreakIterator. Using this class, it is easy to 
+ * scan through text looking for all occurances of a match. The 
+ * following example uses a StringSearch object to find all instances of 
+ * "fox" in the target string. Any other subclass of SearchIterator can be 
+ * used in an identical manner. 
+ * </p>
+ * <p>
+ * Example of use:<br>
+ * <pre>
  * String target = "The quick brown fox jumped over the lazy fox";
  * String pattern = "fox";
- *
  * SearchIterator iter = new StringSearch(pattern, target);
- *
- * for (int pos = iter.first(); pos != SearchIterator.DONE; pos = iter.next()) {
- *     System.out.println("Found match at " + pos +
- *                        ", length is " + iter.getMatchLength());
+ * for (int pos = iter.first(); pos != SearchIterator.DONE; 
+ *                                                       pos = iter.next()) {
+ *     System.out.println("Found match at " + pos + ", length is " 
+ *                        + iter.getMatchLength());
  * }
- * </code></pre>
- *
- * @see StringSearch
+ * </pre>
+ * </p>
+ * @author Laura Werner, synwee
+ * @since release 1.0
+ * @draft release 2.2
+ * @see BreakIterator
  */
-public abstract class SearchIterator {
+public abstract class SearchIterator 
+{
+	
+	// public data members -------------------------------------------------
+	
     /**
-     * DONE is returned by previous() and next() after all valid
-     * matches have been returned, and by first() and last() if
-     * there are no matches at all.
+     * DONE is returned by previous() and next() after all valid matches have 
+     * been returned, and by first() and last() if there are no matches at all.
+     * @see #previous
+     * @see #next
      */
     public static final int DONE = -1;
     
-    /**
-     * Private value indicating that the iterator is pointing
-     * before the beginning of the target text.
-     */
-    private static final int BEFORE = -2;
-
-    /**
-     * Return the first index at which the target text matches the search
-     * pattern.  The iterator is adjusted so that its current index
-     * (as returned by {@link #getIndex}) is the match posisition if one was found
-     * and <code>DONE</code> if one was not.
-     *
-     * @return The character index of the first match, or <code>DONE</code> if there
-     *          are no matches.
-     */
-    final public int first() {
-        setIndex(BEFORE);
-        return next();
-    }
-
-    /**
-     * Return the first index greater than <tt>pos</tt> at which the target
-     * text matches the search pattern.   The iterator is adjusted so that its current index
-     * (as returned by {@link #getIndex}) is the match posisition if one was found
-     * and <code>DONE</code> if one was not.
-     *
-     * @return The character index of the first match following <code>pos</code>,
-     *          or <tt>DONE</tt> if there are no matches.
-     */
-    final public int following(int pos) {
-        setIndex(pos);
-        return next();
-    }
+    // public methods -----------------------------------------------------
+    
+    // public setters -----------------------------------------------------
     
     /**
-     * Return the last index in the target text at which it matches
-     * the search pattern and adjusts the iteration to point to that position.
-     *
-     * @return The index of the first match, or <tt>DONE</tt> if there
-     *          are no matches.
-     */
-    final public int last() {
-        setIndex(DONE);
-        return previous();
-    }
-
-    /**
-     * Return the first index less than <code>pos</code> at which the target
-     * text matches the search pattern.   The iterator is adjusted so that its current index
-     * (as returned by {@link #getIndex}) is the match posisition if one was found
-     * and <tt>DONE</tt> if one was not.
-     *
-     * @return The character index of the first match preceding <code>pos</code>,
-     *          or <code>DONE</code> if there are no matches.
-     */
-    final public int preceding(int pos) {
-        setIndex(pos);
-        return previous();
-    }
-    
-    /**
-     * Return the index of the next point at which the text matches the
-     * search pattern, starting from the current position.
-     * @return The index of the next match after the current position,
-     *          or <code>DONE</code> if there are no more matches.
-     *
-     * @see #first
-     */
-    public int next() {
-        if (index == BEFORE){
-            // Starting at the beginning of the text
-            index = target.getBeginIndex();
-        } else if (length > 0) {
-            // Finding the next match after a previous one
-            index += overlap ? 1 : length;
-        }
-        index -= 1;
-        
-        do {
-            length = 0;
-            index = handleNext(index + 1);
-        } while (index != DONE && !isBreakUnit(index, index+length));
-        
-        return index;
-    }
-
-    /**
-     * Return the index of the previous point at which the text matches
-     * the search pattern, starting at the current position
-     *
-     * @return The index of the previous match before the current position,
-     *          or <code>DONE</code> if there are no more matches.
-     */
-    public int previous() {
-        if (index == DONE) {
-            index = target.getEndIndex();
-        } else if (length > 0) {
-            // Finding the previous match before a following one
-            index = overlap ? index + length - 1 : index;
-        }
-        index += 1;
-        
-        do {
-            length = 0;
-            index = handlePrev(index - 1);
-        } while (index != DONE && !isBreakUnit(index, index+length));
-
-        if (index == DONE) {
-            index = BEFORE;
-        }
-        return getIndex();
-    }
-
-
-
-    /**
-     * Return the current index in the text being searched.
-     * If the iteration has gone past the end of the text
-     * (or past the beginning for a backwards search), 
-     * {@link #DONE} is returned.
-     */
-    public int getIndex() {
-        return index == BEFORE ? DONE : index;
-    }
-
-    /**
-     * Determines whether overlapping matches are returned.  If this
-     * property is <code>true</code>, matches that begin within the
-     * boundry of the previous match are considered valid and will
-     * be returned.  For example, when searching for "abab" in the
-     * target text "ababab", both offsets 0 and 2 will be returned
-     * as valid matches if this property is <code>true</code>.
      * <p>
-     * The default setting of this property is <tt>true</tt>
+     * Sets the position in the target text which the next search will start 
+     * from to the argument. This method clears all previous states.
+     * </p>
+     * @param position index to start next search from.
+     * @exception IndexOutOfBoundsException thrown if argument position is out
+     *            of the target text range.
+     * @see #getIndex
+     * @draft release 2.2
      */
-    public void setOverlapping(boolean allowOverlap) {
-        overlap = allowOverlap;
-    }
-    
-    /**
-     * Determines whether overlapping matches are returned.
-     *
-     * @see #setOverlapping
-     */
-    public boolean isOverlapping() {
-        return overlap;
-    }
-    
-    /**
-     * Returns the length of text in the target which matches the search
-     * pattern.  This call returns a valid result only after a successful
-     * call to {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
-     * Just after construction, or after a searching method returns
-     * <tt>DONE</tt>, this method will return 0.
-     *
-     * @return The length of the match in the target text, or 0 if there
-     *          is no match currently.
-     */
-    public int getMatchLength() {
-        return length;
-    }
-
-    /**
-     * Set the BreakIterator that will be used to restrict the points
-     * at which matches are detected.
-     *
-     * @param breaker   A {@link java.text.BreakIterator BreakIterator}
-     *                  that will be used to restrict the points
-     *                  at which matches are detected.  If a match is found, but the match's start
-     *                  or end index is not a boundary as determined by
-     *                  the <tt>BreakIterator</tt>, the match will be rejected and
-     *                  another will be searched for.
-     *
-     *                  If this parameter is <tt>null</tt>, no break
-     *                  detection is attempted.
-     *
-     * @see #getBreakIterator
-     */
-    public void setBreakIterator(BreakIterator iterator) {
-        breaker = iterator;
-        if (breaker != null) {
-            breaker.setText(target);
+    public void setIndex(int position) {
+        if (position < targetText.getBeginIndex() 
+            || position > targetText.getEndIndex()) {
+            throw new IndexOutOfBoundsException(
+                "setIndex(int) expected position to be between " +
+                targetText.getBeginIndex() + " and " + targetText.getEndIndex());
         }
+        m_setOffset_ = position;
+        m_reset_ = false;
+        matchLength = 0;
     }
-    
-    /**
-     * Returns the BreakIterator that is used to restrict the points
-     * at which matches are detected.  This will be the same object
-     * that was passed to the constructor or to <code>setBreakIterator</code>.
-     * Note that <tt>null</tt> is a legal value; it means that break
+	
+	/**
+	 * <p>
+     * Determines whether overlapping matches are returned. See the class 
+     * documentation for more information about overlapping matches.
+     * </p>
+	 * <p>
+     * The default setting of this property is false
+     * </p>
+	 * @param allowOverlap flag indicator if overlapping matches are allowed
+     * @see #isOverlapping
+	 * @draft release 2.2
+	 */
+	public void setOverlapping(boolean allowOverlap)
+	{
+		m_isOverlap_ = allowOverlap;
+	}
+	
+	/**
+     * Set the BreakIterator that is used to restrict the points at which 
+     * matches are detected.
+     * Using <tt>null</tt> as the parameter is legal; it means that break 
      * detection should not be attempted.
-     *
-     * @see #setBreakIterator
+     * See class documentation for more information.
+     * @param breakiter A BreakIterator that will be used to restrict the 
+     * 					points at which matches are detected.
+     * @see #getBreakIterator
+     * @see BreakIterator
      */
-    public BreakIterator getBreakIterator() {
-        return breaker;
-    }
-    
-    /**
-     * Set the target text which should be searched and resets the
-     * iterator's position to point before the start of the target text.
-     * This method is useful if you want to re-use an iterator to
-     * search for the same pattern within a different body of text.
-     *
-     * @see #getTarget
-     */
-    public void setTarget(CharacterIterator iterator) {
-        target = iterator;
-        if (breaker != null) {
-            breaker.setText(target);
+    public void setBreakIterator(BreakIterator breakiter) 
+    {
+        breakIterator = breakiter;
+        if (breakIterator != null) {
+        	breakIterator.setText(targetText);
         }
-        setIndex(BEFORE);
     }
     
     /**
-     * Return the target text which is being searched
-     *
+ 	 * Set the target text to be searched. Text iteration will hence begin at 
+ 	 * the start of the text string. This method is useful if you want to 
+ 	 * re-use an iterator to search within a different body of text.
+	 * @param text new text iterator to look for match, 
+	 * @exception IllegalArgumentException thrown when text is null or has
+	 * 			  0 length
+	 * @see #getTarget
+	 * @draft ICU 2.0
+	 */
+	public void setTarget(CharacterIterator text)
+	{
+		if (text == null || text.getEndIndex() == text.getIndex()) {
+            throw new IllegalArgumentException("Illegal null or empty text");
+        }
+        
+        targetText = text;
+        targetText.setIndex(targetText.getBeginIndex());
+        matchLength = 0;
+        m_reset_ = true;
+        if (breakIterator != null) {
+        	breakIterator.setText(targetText);
+        }
+	}
+
+	// public getters ----------------------------------------------------
+	
+	/**
+     * <p>
+	 * Returns the index to the most recent match in the target text that was 
+     * searched.
+	 * This call returns a valid result only after a successful call to 
+	 * {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
+	 * Just after construction, or after a searching method returns 
+	 * <tt>DONE</tt>, this method will return <tt>DONE</tt>.
+     * </p>
+	 * <p>
+	 * Use <tt>getMatchLength</tt> to get the matched text length.
+     * <tt>getMatchedText</tt> will return the subtext in the searched 
+     * target text from index getMatchStart() with length getMatchLength(). 
+     * </p>
+	 * @return index to a substring within the text string that is being 
+	 *         searched.
+     * @see #getMatchLength
+     * @see #getMatchedText
+     * @see #first
+     * @see #next
+     * @see #previous
+     * @see #last
+     * @see #DONE
+	 * @draft release 2.2
+	 */
+	public int getMatchStart()
+	{
+		return targetText.getIndex();
+	}
+
+	/**
+     * Return the index in the target text where the iterator is currently 
+     * positioned at. 
+     * If the iteration has gone past the end of the target text or past 
+     * the beginning for a backwards search, {@link #DONE} is returned.
+     * @return index in the target text where the iterator is currently 
+     *         positioned at.
+     * @draft release 2.2
+     * @see #first
+     * @see #next
+     * @see #previous
+     * @see #last
+     * @see #DONE
+     */
+    public abstract int getIndex();
+    
+    /**
+     * <p>
+     * Returns the subtext length of the most recent match in the target text. 
+     * This call returns a valid result only after a successful
+     * call to {@link #first}, {@link #next}, {@link #previous}, or 
+     * {@link #last}.
+     * Just after construction, or after a searching method returns
+     * <tt>DONE</tt>, this method will return 0. See getMatchStart() for 
+     * more details.
+     * </p>
+     * @return The length of the most recent match in the target text, or 0 if 
+     *         there is no match.
+     * @see #getMatchStart
+     * @see #getMatchedText
+     * @see #first
+     * @see #next
+     * @see #previous
+     * @see #last
+     * @see #DONE
+     */
+    public int getMatchLength() 
+    {
+        return matchLength;
+    }
+    
+    /**
+     * Returns the BreakIterator that is used to restrict the indexes at which 
+     * matches are detected. This will be the same object that was passed to 
+     * the constructor or to <code>setBreakIterator</code>.
+     * If the BreakIterator has not been set, <tt>null</tt> will be returned.
+     * See setBreakIterator for more information.
+     * @return the BreakIterator set to restrict logic matches
+     * @see #setBreakIterator
+     * @see BreakIterator
+     */
+    public BreakIterator getBreakIterator() 
+    {
+        return breakIterator;
+    }
+    
+    /**
+     * Return the target text which is being searched.
+     * @return target text being searched.
      * @see #setTarget
      */
-    public CharacterIterator getTarget() {
-        return target;
+    public CharacterIterator getTarget() 
+    {
+        return targetText;
     }
     
     /**
      * Returns the text that was matched by the most recent call to 
-     * {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
-     * If the iterator is not pointing at a valid match (e.g. just after
-     * construction or after <tt>DONE</tt> has been returned, returns
-     * an empty string.
+     * {@link #first}, {@link #next}, {@link #previous}, or {@link #last}. 
+     * If the iterator is not pointing at a valid match, for instance just 
+     * after construction or after <tt>DONE</tt> has been returned, an empty 
+     * String will be returned. See getMatchStart for more information
+     * @see #getMatchStart
+     * @see #getMatchLength
+     * @see #first
+     * @see #next
+     * @see #previous
+     * @see #last
+     * @see #DONE
+     * @return the subtext in target text of the most recent match 
      */
-    public String getMatchedText() {
-        StringBuffer buffer = new StringBuffer();
-        
-        if (length > 0) {
-            int i = 0;
-            for (char c = target.setIndex(index); i < length; c = target.next(), i++)
-            {
-                buffer.append(c);
-            }
-        }
-        return buffer.toString();
+    public String getMatchedText() 
+    {
+        if (matchLength > 0) {
+            int start = targetText.getIndex();
+            int limit = start + matchLength;
+    		StringBuffer result = new StringBuffer(matchLength);
+    		result.append(targetText.current());
+    		targetText.next();
+    		while (targetText.getIndex() < limit) {
+    			result.append(targetText.current());
+    			targetText.next();
+    		}
+            targetText.setIndex(start);
+    		return result.toString();
+    	}
+        return null;
     }
 
-    //-------------------------------------------------------------------
-    // Protected interface for subclasses
-    //-------------------------------------------------------------------
+	// miscellaneous public methods -----------------------------------------
+		
+	/**
+     * Returns the index of the next <b>forwards</b> valid match in the target 
+     * text, 
+     * starting the search from the current iterator position. The iterator is 
+	 * adjusted so that its current index, as returned by {@link #getIndex},
+	 * is the starting position of the match if one was found. If a match is 
+     * not found, <tt>DONE</tt> will be returned.
+	 * @return The starting index of the next forward match after the current 
+     *         iterator position, or 
+	 *         <tt>DONE</tt> if there are no more matches.
+	 * @see #getMatchStart
+     * @see #getMatchLength
+     * @see #getMatchedText
+     * @see #following
+     * @see #preceding
+     * @see #previous
+     * @see #first
+     * @see #last
+     * @see #DONE
+     */
+    public int next()
+    {
+    	int start = targetText.getIndex();
+    	if (m_setOffset_ != DONE) {
+	    	start = m_setOffset_;	
+	    	m_setOffset_ = DONE;	
+	    }
+	    if (m_isForwardSearching_) {
+	    	if (!m_reset_ && 
+	        	start + matchLength >= targetText.getEndIndex()) {
+	            // not enough characters to match
+                matchLength = 0;
+                targetText.setIndex(targetText.getEndIndex());
+	            return DONE; 
+	        }
+	        m_reset_ = false;
+	    }
+	    else {
+	        // switching direction. 
+	        // if matchedIndex == USEARCH_DONE, it means that either a 
+	        // setIndex has been called or that previous ran off the text
+	        // string. the iterator would have been set to offset 0 if a 
+	        // match is not found.
+	        m_isForwardSearching_ = true;
+	        if (start != DONE) {
+	            // there's no need to set the collation element iterator
+	            // the next call to next will set the offset.
+	            return start;
+	        }
+	    }
+	    
+        if (start == DONE) {
+            start = targetText.getBeginIndex();
+        }
+	    return handleNext(start);
+    }
 
     /**
-     * Constructor for use by subclasses.
-     * <p>
-     * @param target    The target text to be searched.  This is for internal
-     *                  use by this class.  Subclasses need to maintain their
-     *                  own reference to or iterator over the target text
-     *                  for use by their {@link #handleNext handleNext} and
-     *                  {@link #handlePrev handlePrev} methods.
-     *
-     * @param breaker   A {@link BreakIterator} that is used to restrict the points
-     *                  at which matches are detected.  If <tt>handleNext</tt> or
-     *                  <tt>handlePrev</tt> finds a match, but the match's start
-     *                  or end index is not a boundary as determined by
-     *                  the <tt>BreakIterator</tt>, the match is rejected and 
-     *                  <tt>handleNext</tt> or <tt>handlePrev</tt> is called again.
-     *                  If this parameter is <tt>null</tt>, no break
-     *                  detection is attempted.
-     *                  
+     * Returns the index of the next <b>backwards</b> valid match in the target 
+     * text, 
+     * starting the search from the current iterator position. The iterator is 
+     * adjusted so that its current index, as returned by {@link #getIndex},
+     * is the starting position of the match if one was found. If a match is 
+     * not found, <tt>DONE</tt> will be returned.
+     * @return The starting index of the next backwards match after the current 
+     *         iterator position, or 
+     *         <tt>DONE</tt> if there are no more matches.
+     * @see #getMatchStart
+     * @see #getMatchLength
+     * @see #getMatchedText
+     * @see #following
+     * @see #preceding
+     * @see #next
+     * @see #first
+     * @see #last
+     * @see #DONE
+     */
+    public int previous()
+    {
+    	int start = targetText.getIndex();
+    	if (m_setOffset_ != DONE) {
+	    	start = m_setOffset_;	
+	    	m_setOffset_ = DONE;	
+	    }
+    	if (m_reset_) {
+            m_isForwardSearching_ = false;
+            m_reset_ = false;
+            start = targetText.getEndIndex();;
+        }
+        
+        if (m_isForwardSearching_ == true) {
+            // switching direction. 
+            // if matchedIndex == USEARCH_DONE, it means that either a 
+            // setIndex has been called or that next ran off the text
+            // string. the iterator would have been set to offset textLength if 
+            // a match is not found.
+            m_isForwardSearching_ = false;
+            if (start != DONE) {
+                return start;
+            }
+            start = targetText.getEndIndex();
+        }
+        else {
+        	if (start == DONE) {
+        		return DONE;
+        	}
+            if (start == targetText.getBeginIndex()) {
+                // not enough characters to match
+                matchLength = 0;
+                targetText.setIndex(targetText.getBeginIndex());
+                return DONE; 
+            }
+        }
+
+		return handlePrevious(start);
+    }
+
+    /**
+     * Checks if the overlapping property has been set.
+     * See setOverlapping(boolean) for more information.
+     * @see #setOverlapping
+     * @return true if the overlapping property has been set, false otherwise
+     * @draft release 2.2
+     */
+    public boolean isOverlapping() 
+    {
+        return m_isOverlap_;
+    }
+    
+    /** 
+	 * <p>
+     * Resets the search iteration. All properties will be reset to the 
+     * default value.
+     * </p>
+	 * <p>
+     * Search will begin at the start of the target text if a forward iteration 
+     * is initiated before a backwards iteration. Otherwise if a 
+	 * backwards iteration is initiated before a forwards iteration, the search 
+	 * will begin at the end of the target text.
+     * </p>
+	 * @draft release 2.2
+	 */
+	public void reset()
+	{
+		// reset is setting the attributes that are already in string search
+        matchLength = 0;
+        setIndex(targetText.getBeginIndex());
+        m_isOverlap_ = false;
+        m_isForwardSearching_ = true;
+        m_reset_ = true;
+        m_setOffset_ = DONE;
+	}
+	
+	/**
+     * Return the index of the first <b>forward</b> match in the target text. 
+     * This method effectively sets the iteration to begin at the start of the 
+     * target text and searches forwards from there.
+     * The iterator is 
+     * adjusted so that its current index, as returned by {@link #getIndex},
+     * is the starting position of the match if one was found. If a match is 
+     * not found, <tt>DONE</tt> will be returned.
+     * @return The index of the first forward match, or <code>DONE</code> 
+     * 		   if there are no matches.
+     * @see #getMatchStart
+     * @see #getMatchLength
+     * @see #getMatchedText
+     * @see #following
+     * @see #preceding
+     * @see #next
+     * @see #previous
+     * @see #last
+     * @see #DONE
+     */
+    public final int first() 
+    {
+        m_isForwardSearching_ = true;
+        setIndex(targetText.getBeginIndex());
+        return next();
+    }
+
+    /**
+     * Return the index of the first <b>forward</b> match in target text that 
+     * is greater than argument <tt>position</tt>. 
+     * This method effectively sets the iteration to begin at the argument
+     * position index of the target text and searches forwards from there.
+     * The iterator is 
+     * adjusted so that its current index, as returned by {@link #getIndex},
+     * is the starting position of the match if one was found. If a match is 
+     * not found, <tt>DONE</tt> will be returned.
+     * @return The index of the first forward match, or <code>DONE</code> 
+     *         if there are no matches.
+     * @see #getMatchStart
+     * @see #getMatchLength
+     * @see #getMatchedText
+     * @see #first
+     * @see #preceding
+     * @see #next
+     * @see #previous
+     * @see #last
+     * @see #DONE
+     */
+    public final int following(int position) 
+    {
+    	m_isForwardSearching_ = true;
+    	// position checked in usearch_setOffset
+        setIndex(position);
+        return next();
+    }
+    
+    /**
+     * Return the index of the last <b>forward</b> match in target text. 
+     * This method effectively sets the iteration to begin at the end of the 
+     * target text and searches backwards from there.
+     * The iterator is 
+     * adjusted so that its current index, as returned by {@link #getIndex},
+     * is the starting position of the match if one was found. If a match is 
+     * not found, <tt>DONE</tt> will be returned.
+     * @return The starting index of the last forward match, or 
+     *         <code>DONE</code> if there are no matches.
+     * @see #getMatchStart
+     * @see #getMatchLength
+     * @see #getMatchedText
+     * @see #first
+     * @see #preceding
+     * @see #next
+     * @see #previous
+     * @see #following
+     * @see #DONE
+     */
+    public final int last() 
+    {
+        m_isForwardSearching_ = false;
+        setIndex(targetText.getEndIndex());
+        return previous();
+    }
+     
+    /**
+     * Return the index of the first <b>backwards</b> match in target 
+     * text that is less than argument <tt>position</tt>. 
+     * This method effectively sets the iteration to begin at the argument
+     * position index of the target text and searches backwards from there.
+     * The iterator is 
+     * adjusted so that its current index, as returned by {@link #getIndex},
+     * is the starting position of the match if one was found. If a match is 
+     * not found, <tt>DONE</tt> will be returned.
+     * @return The starting index of the first backwards match, or 
+     *         <code>DONE</code> 
+     *         if there are no matches.
+     * @see #getMatchStart
+     * @see #getMatchLength
+     * @see #getMatchedText
+     * @see #first
+     * @see #following
+     * @see #next
+     * @see #previous
+     * @see #last
+     * @see #DONE
+     */
+    public final int preceding(int position) 
+    {
+        m_isForwardSearching_ = false;
+        // position checked in usearch_setOffset
+        setIndex(position);
+        return previous();   
+    }
+    
+    // protected data member ----------------------------------------------
+    
+    /**
+     * The BreakIterator to define the boundaries of a logical match.
+     * This value can be a null.
+     * See class documentation for more information.
+     * @see #setBreakIterator(BreakIterator)
+     * @see #getBreakIterator
+     * @see BreakIterator
+     */
+    protected BreakIterator breakIterator; 
+    /**
+     * Target text for searching.
+     * @see #setTarget(CharacterIterator)
+     * @see #getTarget
+     */
+    protected CharacterIterator targetText;
+    /**
+     * Length of the most current match in target text. 
+     * Value 0 is the default value.
+     * @see #setMatchLength
+     * @see #getMatchLength
+     */
+    protected int matchLength;
+    
+    // protected constructor ----------------------------------------------
+    
+	/**
+     * Protected constructor for use by subclasses.
+     * Initializes the iterator with the argument target text for searching 
+     * and sets the BreakIterator.
+     * See class documentation for more details on the use of the target text
+     * and BreakIterator.
+     * @param target The target text to be searched.
+     * @param breaker A {@link BreakIterator} that is used to determine the 
+     *                boundaries of a logical match. This argument can be null.
+     * @exception IllegalArgumentException thrown when argument target is null,
+     *            or of length 0
+     * @see BreakIterator  
      */
     protected SearchIterator(CharacterIterator target, BreakIterator breaker)
     {
-        this.target = target;
-        
-        if (breaker != null) {
-            this.breaker = (BreakIterator)breaker.clone();
-            this.breaker.setText(target);
+        if (target == null 
+            || (target.getEndIndex() - target.getBeginIndex()) == 0) {
+                throw new IllegalArgumentException(
+                                   "Illegal argument target. " +
+                                   " Argument can not be null or of length 0");
         }
-        
-        index = target.getBeginIndex();
-        length = 0;
-    }
-
-    /**
-     * Abstract method which subclasses override to provide the mechanism
-     * for finding the next match in the target text.  This allows different
-     * subclasses to provide different search algorithms.
-     * <p>
-     * If a match is found, the implementation should return the index at
-     * which the match starts and should call {@link #setMatchLength setMatchLength}
-     * with the number of characters in the target
-     * text that make up the match.  If no match is found, the method
-     * should return DONE and should not call <tt>setMatchLength</tt>.
-     * <p>
-     * @param startAt   The index in the target text at which the search
-     *                  should start.
-     *
-     * @see #setMatchLength
-     */
-    protected abstract int handleNext(int startAt);
-
-    /**
-     * Abstract method which subclasses override to provide the mechanism
-     * for finding the previous match in the target text.  This allows different
-     * subclasses to provide different search algorithms.
-     * <p>
-     * If a match is found, the implementation should return the index at
-     * which the match starts and should call {@link #setMatchLength setMatchLength}
-     * with the number of characters in the target
-     * text that make up the match.  If no match is found, the method
-     * should return DONE and should not call <tt>setMatchLength</tt>.
-     * <p>
-     * @param startAt   The index in the target text at which the search
-     *                  should start.
-     *
-     * @see #setMatchLength
-     */
-    protected abstract int handlePrev(int startAt);
-
-    /**
-     * Sets the length of the currently matched string in the target text.
-     * Subclasses' <code>handleNext</code> and <code>handlePrev</code>
-     * methods should call this when they find a match in the target text.
-     */
-    protected void setMatchLength(int length) {
-        this.length = length;
-    }
-
-    //-------------------------------------------------------------------
-    // Privates
-    //
+    	targetText = target;
+    	breakIterator = breaker;
+    	if (breakIterator != null) {
+    		breakIterator.setText(target);
+    	}
+    	matchLength = 0;
+        m_isOverlap_ = false;
+        m_isForwardSearching_ = true;
+        m_reset_ = true;
+        m_setOffset_ = DONE;
+    }    
     
+    // protected methods --------------------------------------------------
+
+   
     /**
-     * Internal method used by preceding and following.  Sets the index
-     * to point to the given position, and clears any state that's
-     * affected.
-     */
-    private void setIndex(int pos) {
-        index = pos;
-        length = 0;
-    }
-    
-    /**
-     * Determine whether the target text bounded by <code>start</code> and
-     * <code>end</code> is one or more whole units of text as determined by
-     * the current <code>BreakIterator</code>.
-     */
-    private boolean isBreakUnit(int start, int end)
+	 * Sets the length of the most recent match in the target text. 
+	 * Subclasses' handleNext() and handlePrevious() methods should call this 
+     * after they find a match in the target text.    
+	 * @param length new length to set
+     * @see #handleNext
+     * @see #handlePrevious
+	 */
+    protected void setMatchLength(int length)
     {
-        if (breaker == null) {
-            return true;
-        } 
-        boolean startBound = breaker.isBoundary(start);
-        boolean endBound = (end == target.getEndIndex()) || breaker.isBoundary(end);
-        
-        return startBound && endBound;
+    	matchLength = length;
     }
+
+	/**
+	 * <p>
+     * Abstract method which subclasses override to provide the mechanism 
+	 * for finding the next <b>forwards</b> match in the target text. This 
+     * allows different subclasses to provide different search algorithms.
+     * </p> 
+	 * <p>
+     * If a match is found, setMatchLength(int) would have to be called to
+     * set the length of the result match.
+     * The iterator is adjusted so that its current index, as returned by 
+     * {@link #getIndex}, is the starting position of the match if one was 
+     * found. If a match is not found, <tt>DONE</tt> will be returned.
+     * </p> 
+	 * @param start index in the target text at which the forwards search 
+     *        should begin.
+	 * @return the starting index of the next forwards match if found, DONE 
+     *         otherwise
+	 * @see #setMatchLength(int)
+     * @see #handlePrevious(int)
+     * @see #DONE
+	 */
+    protected abstract int handleNext(int start);
     
-    //-------------------------------------------------------------------------
-    // Private data...
-    //-------------------------------------------------------------------------
-    private int                 index;          // Current position in the target text
-    private int                 length;         // Length of matched text, or 0
-    private boolean             overlap = true; // Return overlapping matches?
-    private CharacterIterator   target;         // Target text to be searched
-    private BreakIterator       breaker;        // Break iterator to constrain matches
-};
+    /**
+	 * <p>
+     * Abstract method which subclasses override to provide the mechanism 
+	 * for finding the next <b>backwards</b> match in the target text. 
+     * This allows different 
+	 * subclasses to provide different search algorithms. 
+     * </p> 
+	 * <p>
+     * If a match is found, setMatchLength(int) would have to be called to
+     * set the length of the result match.
+     * The iterator is adjusted so that its current index, as returned by 
+     * {@link #getIndex}, is the starting position of the match if one was 
+     * found. If a match is not found, <tt>DONE</tt> will be returned.
+     * </p> 
+	 * @param start index in the target text at which the backwards search 
+     *        should begin.
+	 * @return the starting index of the next backwards match if found, 
+     *         DONE otherwise
+	 * @see #setMatchLength(int)
+     * @see #handleNext(int)
+     * @see #DONE
+	 */
+    protected abstract int handlePrevious(int startAt);
+    
+    // private data members ------------------------------------------------
+    
+    /**
+     * Flag indicates if we are doing a forwards search
+     */
+    private boolean m_isForwardSearching_;
+    /**
+     * Flag to indicate if overlapping search is to be done.
+     * E.g. looking for "aa" in "aaa" will yield matches at offset 0 and 1.
+     */
+    private boolean m_isOverlap_;
+    /**
+     * Flag indicates if we are at the start of a string search.
+     * This indicates that we are in forward search and at the start of m_text.
+     */ 
+    private boolean m_reset_;
+    /**
+     * Data member to store user defined position in setIndex().
+     * If setIndex() is not called, this value will be DONE.
+     */ 
+    private int m_setOffset_;
+}
diff --git a/icu4j/src/com/ibm/icu/text/StringSearch.java b/icu4j/src/com/ibm/icu/text/StringSearch.java
index 44dfa5a3747..e33989c1621 100755
--- a/icu4j/src/com/ibm/icu/text/StringSearch.java
+++ b/icu4j/src/com/ibm/icu/text/StringSearch.java
@@ -5,642 +5,3081 @@
  *******************************************************************************
  *
  * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/StringSearch.java,v $ 
- * $Date: 2002/03/20 05:11:16 $ 
- * $Revision: 1.6 $
+ * $Date: 2002/06/21 23:56:48 $ 
+ * $Revision: 1.7 $
  *
  *****************************************************************************************
  */
 
 package com.ibm.icu.text;
 
-import java.text.BreakIterator;
 import java.text.CharacterIterator;
-import java.text.CollationElementIterator;
-import java.text.Collator;
-import java.text.RuleBasedCollator;
 import java.text.StringCharacterIterator;
 import java.util.Locale;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.impl.NormalizerImpl;
 
 /**
- * <code>StringSearch</code> is a <code>SearchIterator</code> that provides
- * language-sensitive text searching based on the comparison rules defined
- * in a {@link RuleBasedCollator} object.
- * Instances of <code>StringSearch</code> function as iterators
- * maintain a current position and scan over text returning the index of
- * characters where the pattern occurs and the length of each match.
+ * <p>
+ * <code>StringSearch</code> is the concrete subclass of 
+ * <code>SearchIterator</code> that provides language-sensitive text searching 
+ * based on the comparison rules defined in a {@link RuleBasedCollator} object.
+ * </p>
  * <p>
  * <code>StringSearch</code> uses a version of the fast Boyer-Moore search
  * algorithm that has been adapted to work with the large character set of
- * Unicode.  See "Efficient Text Searching in Java", to be published in
- * <i>Java Report</i> in February, 1999, for further information on the algorithm.
+ * Unicode. Refer to 
+ * <a href=http://oss.software.ibm.com/icu/docs/papers/efficient_text_searching_in_java.html>
+ * "Efficient Text Searching in Java"</a>, published in the 
+ * <i>Java Report</i> on February, 1999, for further information on the 
+ * algorithm.
+ * </p>
  * <p>
- * Consult the <code>SearchIterator</code> documentation for information on
- * and examples of how to use instances of this class to implement text
- * searching.  <code>SearchIterator</code> provides all of the necessary
- * API; this class only provides constructors and internal implementation
- * methods.
- *
+ * Users are also strongly encouraged to read the section on 
+ * <a href=http://oss.software.ibm.com/icu/userguide/searchString.html>
+ * String Search</a> and 
+ * <a href=http://oss.software.ibm.com/icu/userguide/Collate_Intro.html>
+ * Collation</a> in the user guide before attempting to use this class.
+ * </p>
+ * <p>
+ * String searching gets alittle complicated when accents are encountered at
+ * match boundaries. If a match is found and it has preceding or trailing 
+ * accents not part of the match, the result returned will include the 
+ * preceding accents up to the first base character, if the pattern searched 
+ * for starts an accent. Likewise, 
+ * if the pattern ends with an accent, all trailing accents up to the first
+ * base character will be included in the result.
+ * </p>
+ * <p>
+ * For example, if a match is found in target text "a&#92;u0325&#92;u0300" for 
+ * the pattern
+ * "a&#92;u0325", the result returned by StringSearch will be the index 0 and
+ * length 3 &lt;0, 3&gt;. If a match is found in the target 
+ * "a&#92;u0325&#92;u0300" 
+ * for the pattern "&#92;u0300", then the result will be index 1 and length 2 
+ * <1, 2>.
+ * </p>
+ * <p>
+ * In the case where the decomposition mode is on for the RuleBasedCollator,
+ * all matches that starts or ends with an accent will have its results include 
+ * preceding or following accents respectively. For example, if pattern "a" is
+ * looked for in the target text "&aacute;&#92;u0325", the result will be
+ * index 0 and length 2 &lt;0, 2&gt;.
+ * </p>
+ * <p>
+ * The StringSearch class provides two options to handle accent matching 
+ * described below:
+ * </p>
+ * <p>
+ * Let S' be the sub-string of a text string S between the offsets start and 
+ * end &lt;start, end&gt;.
+ * <br>
+ * A pattern string P matches a text string S at the offsets &lt;start, 
+ * length&gt; 
+ * <br>
+ * if
+ * <pre> 
+ * option 1. P matches some canonical equivalent string of S'. Suppose the 
+ *           RuleBasedCollator used for searching has a collation strength of 
+ *           TERTIARY, all accents are non-ignorable. If the pattern 
+ *           "a&#92;u0300" is searched in the target text 
+ *           "a&#92;u0325&#92;u0300", 
+ *           a match will be found, since the target text is canonically 
+ *           equivalent to "a&#92;u0300&#92;u0325"
+ * option 2. P matches S' and if P starts or ends with a combining mark, 
+ *           there exists no non-ignorable combining mark before or after S� 
+ *           in S respectively. Following the example above, the pattern 
+ *           "a&#92;u0300" will not find a match in "a&#92;u0325&#92;u0300", 
+ *           since
+ *           there exists a non-ignorable accent '&#92;u0325' in the middle of 
+ *           'a' and '&#92;u0300'. Even with a target text of 
+ *           "a&#92;u0300&#92;u0325" a match will not be found because of the 
+ *           non-ignorable trailing accent &#92;u0325.
+ * </pre>
+ * Option 2. will be the default mode for dealing with boundary accents unless
+ * specified via the API setCanonical(boolean).
+ * One restriction is to be noted for option 1. Currently there are no 
+ * composite characters that consists of a character with combining class > 0 
+ * before a character with combining class == 0. However, if such a character 
+ * exists in the future, the StringSearch may not work correctly with option 1
+ * when such characters are encountered.
+ * </p>
+ * <p>
+ * <tt>SearchIterator</tt> provides APIs to specify the starting position 
+ * within the text string to be searched, e.g. <tt>setIndex</tt>,
+ * <tt>preceding</tt> and <tt>following</tt>. Since the starting position will 
+ * be set as it is specified, please take note that there are some dangerous 
+ * positions which the search may render incorrect results:
+ * <ul>
+ * <li> The midst of a substring that requires decomposition.
+ * <li> If the following match is to be found, the position should not be the
+ *      second character which requires to be swapped with the preceding 
+ *      character. Vice versa, if the preceding match is to be found, 
+ *      position to search from should not be the first character which 
+ *      requires to be swapped with the next character. E.g certain Thai and
+ *      Lao characters require swapping.
+ * <li> If a following pattern match is to be found, any position within a 
+ *      contracting sequence except the first will fail. Vice versa if a 
+ *      preceding pattern match is to be found, a invalid starting point 
+ *      would be any character within a contracting sequence except the last.
+ * </ul>
+ * </p>
+ * <p>
+ * Though collator attributes will be taken into consideration while 
+ * performing matches, there are no APIs provided in StringSearch for setting 
+ * and getting the attributes. These attributes can be set by getting the 
+ * collator from <tt>getCollator</tt> and using the APIs in 
+ * <tt>com.ibm.icu.text.Collator</tt>. To update StringSearch to the new 
+ * collator attributes, <tt>reset()</tt> or 
+ * <tt>setCollator(RuleBasedCollator)</tt> has to be called.
+ * </p>
+ * <p>
+ * Consult the 
+ * <a href=http://oss.software.ibm.com/icu/userguide/searchString.html>
+ * String Search</a> user guide and the <code>SearchIterator</code> 
+ * documentation for more information and examples of use.
+ * </p>
  * @see SearchIterator
- * @see java.text.RuleBasedCollator
- *
- * @author Laura Werner
- * @version 1.0
+ * @see RuleBasedCollator
+ * @author Laura Werner, synwee
+ * @since 1.0
  */
+// internal notes: all methods do not guarantee the correct status of the 
+// characteriterator. the caller has to maintain the original index position
+// if necessary. methods could change the index position as it deems fit
 public final class StringSearch extends SearchIterator
 {
+	
+	// public constructors --------------------------------------------------
+	
     /**
-     * Construct a <code>StringSearch</code> object using a specific collator and set
-     * of boundary-detection rules.
-     * <p>
-     * @param pat       The text for which this object will search.
-     *
-     * @param target    The text in which to search for the pattern.
-     *
-     * @param coll      A <code>RuleBasedCollator</code> object which defines the
-     *                  language-sensitive comparison rules used to determine 
-     *                  whether text in the pattern and target matches.
-     *
-     * @param breaker   A <code>BreakIterator</code> object used to constrain the matches
-     *                  that are found.  Matches whose start and end indices
-     *                  in the target text are not boundaries as determined
-     *                  by the <code>BreakIterator</code> are ignored.  If this behavior
-     *                  is not desired, <code>null</code> can be passed in instead.
+     * Initializes the iterator to use the language-specific rules defined in 
+     * the argument collator to search for argument pattern in the argument 
+     * target text. The argument breakiter is used to define logical matches.
+     * See super class documentation for more details on the use of the target 
+     * text and BreakIterator.
+     * @param pattern text to look for.
+     * @param target target text to search for pattern. 
+     * @param collator RuleBasedCollator that defines the language rules
+     * @param breaker A {@link BreakIterator} that is used to determine the 
+     *                boundaries of a logical match. This argument can be null.
+     * @exception IllegalArgumentException thrown when argument target is null,
+     *            or of length 0
+     * @see BreakIterator
+     * @see RuleBasedCollator
+     * @see SearchIterator
      */
-    public StringSearch(String pat, CharacterIterator target,
-                            RuleBasedCollator coll, BreakIterator breaker) {
-        super(target, breaker);
-
-        pattern = pat;
-        collator = coll;
-        strength = coll.getStrength();
-        iter = collator.getCollationElementIterator(target);
+    public StringSearch(String pattern, CharacterIterator target,
+                        RuleBasedCollator collator, BreakIterator breakiter) 
+    {
+        super(target, breakiter);
+        m_textBeginOffset_ = targetText.getBeginIndex();
+        m_collator_ = collator;
+        m_colEIter_ = m_collator_.getCollationElementIterator(target);
+        m_utilColEIter_ = collator.getCollationElementIterator("");
+        m_ceMask_ = getMask(m_collator_.getStrength());
+        m_isCanonicalMatch_ = false;
+        m_pattern_ = new Pattern(pattern);
+        m_matchedIndex_ = DONE;
         
-        initialize();   // Initialize the Boyer-Moore tables
+        initialize();
     }
 
     /**
-     * Construct a <code>StringSearch</code> object using a specific collator.
-     * <p>
-     * @param pattern   The text for which this object will search.
-     *
-     * @param target    The text in which to search for the pattern.
-     *
-     * @param collator  A <code>RuleBasedCollator</code> object which defines the
-     *                  language-sensitive comparison rules used to determine 
-     *                  whether text in the pattern and target matches.
+     * Initializes the iterator to use the language-specific rules defined in 
+     * the argument collator to search for argument pattern in the argument 
+     * target text. No BreakIterators are set to test for logical matches.
+     * @param pattern text to look for.
+     * @param target target text to search for pattern. 
+     * @param collator RuleBasedCollator that defines the language rules
+     * @exception IllegalArgumentException thrown when argument target is null,
+     *            or of length 0
+     * @see RuleBasedCollator
+     * @see SearchIterator
      */
-    public StringSearch(String pattern,
-                            CharacterIterator target,
-                            RuleBasedCollator collator) {
+    public StringSearch(String pattern, CharacterIterator target,
+                        RuleBasedCollator collator) 
+    {
         this(pattern, target, collator, BreakIterator.getCharacterInstance());
     }
 
     /**
-     * Construct a <code>StringSearch</code> object using the collator and
-     * character boundary detection rules for a given locale.
-     * @param pattern   The text for which this object will search.
-     *
-     * @param target    The text in which to search for the pattern.
-     *
-     * @param loc       The locale whose collation and break-detection rules
-     *                  should be used.
-     *
-     * @exception       ClassCastException thrown if the collator for the specified
-     *                  locale is not a RuleBasedCollator.
+     * Initializes the iterator to use the language-specific rules and 
+     * break iterator rules defined in the argument locale to search for 
+     * argument pattern in the argument target text. 
+     * See super class documentation for more details on the use of the target 
+     * text and BreakIterator.
+     * @param pattern text to look for.
+     * @param target target text to search for pattern. 
+     * @param locale locale to use for language and break iterator rules
+     * @exception IllegalArgumentException thrown when argument target is null,
+     *            or of length 0. ClassCastException thrown if the collator for 
+     *            the specified locale is not a RuleBasedCollator.
+     * @see BreakIterator
+     * @see RuleBasedCollator
+     * @see SearchIterator
      */
-    public StringSearch(String pattern, CharacterIterator target, Locale loc) {
-        this(pattern, target,
-             (RuleBasedCollator) Collator.getInstance(loc),
-             BreakIterator.getCharacterInstance(loc));
+    public StringSearch(String pattern, CharacterIterator target, Locale locale)
+    {
+        this(pattern, target, (RuleBasedCollator)Collator.getInstance(locale),
+             BreakIterator.getCharacterInstance(locale));
     }
 
     /**
-     * Construct a <code>StringSearch</code> object using the collator for the default
-     * locale.
-     * @param pattern   The text for which this object will search.
-     *
-     * @param target    The text in which to search for the pattern.
-     *
-     * @param collator  A <code>RuleBasedCollator</code> object which defines the
-     *                  language-sensitive comparison rules used to determine 
-     *                  whether text in the pattern and target matches.
+     * Initializes the iterator to use the language-specific rules and 
+     * break iterator rules defined in the default locale to search for 
+     * argument pattern in the argument target text. 
+     * See super class documentation for more details on the use of the target 
+     * text and BreakIterator.
+     * @param pattern text to look for.
+     * @param target target text to search for pattern. 
+     * @exception IllegalArgumentException thrown when argument target is null,
+     *            or of length 0. ClassCastException thrown if the collator for 
+     *            the default locale is not a RuleBasedCollator.
+     * @see BreakIterator
+     * @see RuleBasedCollator
+     * @see SearchIterator
      */
-    public StringSearch(String pattern, String target) {
-        this(pattern,
-             new StringCharacterIterator(target),
+    public StringSearch(String pattern, String target) 
+    {
+        this(pattern, new StringCharacterIterator(target),
              (RuleBasedCollator)Collator.getInstance(),
              BreakIterator.getCharacterInstance());
     }
 
-    //-------------------------------------------------------------------
-    // Getters and Setters
-    //-------------------------------------------------------------------
+    // public getters -----------------------------------------------------
     
     /**
-     * Sets this object's strength property. The strength determines the
-     * minimum level of difference considered significant during a
-     * search.  Generally, {@link Collator#TERTIARY} and 
-     * {@link Collator#IDENTICAL} indicate that all differences are
-     * considered significant, {@link Collator#SECONDARY} indicates
-     * that upper/lower case distinctions should be ignored, and
-     * {@link Collator#PRIMARY} indicates that both case and accents
-     * should be ignored.  However, the exact meanings of these constants
-     * are determined by individual Collator objects.
-     * <p>
-     * @see java.text.Collator#PRIMARY
-     * @see java.text.Collator#SECONDARY
-     * @see java.text.Collator#TERTIARY
-     * @see java.text.Collator#IDENTICAL
+     * Returns the strength property of the RuleBasedCollator used in searching. 
+     * See the RuleBasedCollator class documentation for a description of the
+     * strength property.
+     * @return the strength property of the RuleBasedCollator used in searching
+     * @see RuleBasedCollator
+     * @see #setStrength
+     * @see #getCollator
+     * @deprecated since release 2.2, user who would like to access the 
+     *             RuleBasedCollator strength, should retrieve the 
+     *             RuleBasedCollator via the API getCollator(), and use the 
+     *             Collator APIs to retrieve the strength.
      */
-    public void setStrength(int newStrength) {
-        strength = newStrength;
-        
+    public int getStrength() {
+        return m_collator_.getStrength();
+    }
+    
+    /**
+	 * <p>
+     * Gets the RuleBasedCollator used for the language rules.
+     * </p>
+	 * <p>
+     * Since StringSearch depends on the returned RuleBasedCollator, any 
+	 * changes to the RuleBasedCollator result should follow with a call to 
+	 * either StringSearch.reset() or 
+	 * StringSearch.setCollator(RuleBasedCollator) to ensure the correct 
+	 * search behaviour.
+     * </p>
+	 * @return RuleBasedCollator used by this StringSearch
+     * @see RuleBasedCollator
+     * @see #setCollator
+	 */
+    public RuleBasedCollator getCollator() 
+    {
+        return m_collator_;
+    }
+    
+    /**
+     * Returns the pattern for which StringSearch is searching for.
+     * @return the pattern searched for
+     */
+    public String getPattern() 
+    {
+        return m_pattern_.targetText;
+    }
+    
+    /**
+     * Return the index in the target text where the iterator is currently 
+     * positioned at. 
+     * If the iteration has gone past the end of the target text or past 
+     * the beginning for a backwards search, {@link #DONE} is returned.
+     * @return index in the target text where the iterator is currently 
+     *         positioned at
+     * @draft release 2.2
+     */
+    public int getIndex() 
+    {
+        int result = m_colEIter_.getOffset();
+        if (isOutOfBounds(m_textBeginOffset_, m_textLimitOffset_, result)) {
+            return DONE;
+        }
+        return result;
+    }
+    
+    /**
+     * Determines whether canonical matches (option 1, as described in the 
+     * class documentation) is set.
+     * See setCanonical(boolean) for more information.
+     * @see #setCanonical
+     * @return true if canonical matches is set, false otherwise
+     * @draft release 2.2
+     */
+    public boolean isCanonical() 
+    {
+        return m_isCanonicalMatch_;
+    }
+    
+    // public setters -----------------------------------------------------
+    
+    /**
+     * <p>
+     * Sets the strength property of the RuleBasedCollator used for searching.
+     * See the Collator documentation for a description of the strengths.
+     * </p>
+     * @deprecated since release 2.2, user who would like to modify the 
+     * 		       RuleBasedCollator, should retrieve the RuleBasedCollator 
+     *             via the API getCollator(), and use the Collator APIs to 
+     * 		       modify the strength. After which StringSearch.reset() 
+     *             or StringSearch.setCollator(RuleBasedCollator) should be
+     *             called to update StringSearch.
+     * @see Collator
+     * @see Collator#PRIMARY
+     * @see Collator#SECONDARY
+     * @see Collator#TERTIARY
+     * @see Collator#QUATERNARY
+     * @see Collator#IDENTICAL
+     * @see #setCollator
+     * @see #getCollator
+     */
+    public void setStrength(int newStrength) 
+    {
         // Due to a bug (?) in CollationElementIterator, we must set the
         // collator's strength as well, since the iterator is going to
         // mask out the portions of the collation element that are not
         // relevant for the collator's current strength setting
         // Note that this makes it impossible to share a Collator among
         // multiple StringSearch objects if you adjust Strength settings.
-        collator.setStrength(strength);
+        m_collator_.setStrength(newStrength);
         initialize();
     }
     
-    
     /**
-     * Returns this object's strength property, which indicates what level
-     * of differences are considered significant during a search.
      * <p>
-     * @see #setStrength
-     */
-    public int getStrength() {
-        return strength;
-    }
-    
-    /**
-     * Set the collator to be used for this string search.  Also changes
-     * the search strength to match that of the new collator.
+     * Sets the RuleBasedCollator to be used for language-specific searching.
+     * </p>
      * <p>
      * This method causes internal data such as Boyer-Moore shift tables
      * to be recalculated, but the iterator's position is unchanged.
-     * <p>
+     * </p>
+     * @param collator to use for this StringSearch
+     * @exception IllegalArgumentException thrown when collator is null
      * @see #getCollator
      */
-    public void setCollator(RuleBasedCollator coll) {
-        collator = coll;
-        strength = collator.getStrength();
-        
-        // Also need to recompute the pattern and get a new target iterator
-        iter = collator.getCollationElementIterator(getTarget());
+    public void setCollator(RuleBasedCollator collator) 
+    {
+    	if (collator == null) {
+            throw new IllegalArgumentException("Collator can not be null");
+        }
+        m_collator_ = collator;
+        m_ceMask_ = getMask(m_collator_.getStrength());
+        // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT
         initialize();
+        m_colEIter_.setCollator(m_collator_);
+        m_utilColEIter_.setCollator(m_collator_);
     }
     
     /**
-     * Return the RuleBasedCollator being used for this string search.
-     */
-    public RuleBasedCollator getCollator() {
-        return collator;
-    }
-    
-    /**
-     * Set the pattern for which to search.  
+     * <p>
+     * Set the pattern to search for.  
+     * </p>
+     * <p>
      * This method causes internal data such as Boyer-Moore shift tables
      * to be recalculated, but the iterator's position is unchanged.
+     * </p>
+     * @param pattern for searching
+     * @see #getPattern
+     * @exception IllegalArgumentException thrown if pattern is null or of
+     * 			  length 0
      */
-    public void setPattern(String pat) {
-        pattern = pat;
+    public void setPattern(String pattern) 
+    {
+    	if (pattern == null || pattern.length() <= 0) {
+            throw new IllegalArgumentException(
+            		"Pattern to search for can not be null or of length 0");
+        }
+        m_pattern_.targetText = pattern;
         initialize();
     }
     
     /**
-     * Returns the pattern for which this object is searching.
-     */
-    public String getPattern() {
-        return pattern;
-    }
+ 	 * Set the target text to be searched. Text iteration will hence begin at 
+     * the start of the text string. This method is useful if you want to 
+     * re-use an iterator to search within a different body of text.
+     * @param text new text iterator to look for match, 
+     * @exception IllegalArgumentException thrown when text is null or has
+     *            0 length
+     * @see #getTarget
+	 * @draft release 2.2
+	 */
+	public void setTarget(CharacterIterator text)
+	{
+		super.setTarget(text);
+        m_textBeginOffset_ = targetText.getBeginIndex();
+        m_colEIter_.setText(targetText);
+	}
     
     /**
-     * Set the target text which should be searched and resets the
-     * iterator's position to point before the start of the new text.
-     * This method is useful if you want to re-use an iterator to
-     * search for the same pattern within a different body of text.
-     */
-    public void setTarget(CharacterIterator target) {
-        super.setTarget(target);
-        
-        // Since we're caching a CollationElementIterator, recreate it
-        iter = collator.getCollationElementIterator(target);
-    }
-
-    //-------------------------------------------------------------------
-    // Privates
-    //-------------------------------------------------------------------
+	 * <p>
+     * Sets the position in the target text which the next search will start 
+     * from to the argument. This method clears all previous states.
+     * </p>
+     * <p>
+     * This method takes the argument position and sets the position in the 
+     * target text accordingly, without checking if position is pointing to a 
+     * valid starting point to begin searching.
+     * </p>
+     * <p>
+     * Search positions that may render incorrect results are highlighted in 
+     * the class documentation.
+     * </p>
+     * @param position index to start next search from.
+     * @exception IndexOutOfBoundsException thrown if argument position is out
+     *            of the target text range.
+     * @see #getIndex
+     * @draft release 2.2
+	 */
+	public void setIndex(int position)
+	{
+		super.setIndex(position);
+        m_matchedIndex_ = DONE;
+        m_colEIter_.setExactOffset(position);
+	}
+	
+	/**
+	 * <p>
+     * Set the canonical match mode. See class documentation for details.
+     * The default setting for this property is false.
+     * </p>
+	 * @param allowCanonical flag indicator if canonical matches are allowed
+     * @see #isCanonical
+	 * @draft release 2.2
+	 */
+	public void setCanonical(boolean allowCanonical)
+	{
+		m_isCanonicalMatch_ = allowCanonical;
+		if (m_isCanonicalMatch_ == true) {
+			if (m_canonicalPrefixAccents_ == null) {
+				m_canonicalPrefixAccents_ = new StringBuffer();
+			}
+			else {
+				m_canonicalPrefixAccents_.delete(0, 
+											m_canonicalPrefixAccents_.length());
+			}
+			if (m_canonicalSuffixAccents_ == null) {
+				m_canonicalSuffixAccents_ = new StringBuffer();
+			}
+			else {
+				m_canonicalSuffixAccents_.delete(0, 
+											m_canonicalSuffixAccents_.length());
+			}
+		}
+	}
+	
+	// public miscellaneous methods -----------------------------------------
+	
+	/** 
+	 * <p>
+     * Resets the search iteration. All properties will be reset to the 
+     * default value.
+     * </p>
+     * <p>
+     * Search will begin at the start of the target text if a forward iteration 
+     * is initiated before a backwards iteration. Otherwise if a 
+     * backwards iteration is initiated before a forwards iteration, the search 
+     * will begin at the end of the target text.
+     * </p>
+     * <p>
+     * Canonical match option will be reset to false, ie an exact match.
+     * </p>
+	 * @draft release 2.2
+	 */
+	public void reset()
+	{
+		// reset is setting the attributes that are already in string search, 
+		// hence all attributes in the collator should be retrieved without any 
+		// problems
+		super.reset();
+        m_isCanonicalMatch_ = false;
+        m_ceMask_ = getMask(m_collator_.getStrength());
+        // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT
+        initialize();
+        m_colEIter_.setCollator(m_collator_);
+        m_colEIter_.reset();
+        m_utilColEIter_.setCollator(m_collator_);
+	}
 
+    // protected methods -----------------------------------------------------
+    
     /**
-     * Search forward for matching text, starting at a given location.
-     * Clients should not call this method directly; instead they should call
-     * {@link SearchIterator#next}.
      * <p>
-     * If a match is found, this method returns the index at which the match
-     * starts and calls {@link SearchIterator#setMatchLength}
-     * with the number of characters in the target
-     * text that make up the match.  If no match is found, the method returns
-     * <code>DONE</code> and does not call <tt>setMatchLength</tt>.
-     * <p>
-     * @param start The index in the target text at which the search starts.
-     *
-     * @return      The index at which the matched text in the target starts, or DONE
-     *              if no match was found.
-     * <p>
-     * @see SearchIterator#next
-     * @see SearchIterator#DONE
+     * Concrete method to provide the mechanism 
+     * for finding the next <b>forwards</b> match in the target text.
+     * See super class documentation for its use.
+     * </p>  
+     * @param start index in the target text at which the forwards search 
+     *        should begin.
+     * @return the starting index of the next forwards match if found, DONE 
+     *         otherwise
+     * @see #handlePrevious(int)
+     * @see #DONE
      */
     protected int handleNext(int start)
     {
-        CharacterIterator target = getTarget();
-        
-        int mask = getMask(strength);
-        int done = CollationElementIterator.NULLORDER & mask;
-        
-        if (DEBUG) {
-            debug("-------------------------handleNext-----------------------------------");
-            debug("");
-            debug("strength=" + strength + ", mask=" + Integer.toString(mask,16)
-                + ", done=" + Integer.toString(done,16));
-            debug("decomp=" + collator.getDecomposition());
-            
-            debug("target.begin=" + getTarget().getBeginIndex());
-            debug("target.end=" + getTarget().getEndIndex());
-            debug("start = " + start);
-        }
-        
-        int index = start + minLen;
-        int matchEnd = 0;
-
-        while (index <= target.getEndIndex())
-        {
-            int patIndex = normLen;
-            int tval = 0, pval = 0;
-            boolean getP = true;
-            
-            iter.setOffset(index);
-            matchEnd = index;
-            
-            if (DEBUG) debug(" outer loop: patIndex=" + patIndex + ", index=" + index + ", iter offset= " + iter.getOffset());
-            
-            while ((patIndex > 0 || getP == false) && iter.getOffset() > start)
-            {
-                if (DEBUG) {
-                    debug("  inner loop: patIndex=" + patIndex + " iter=" + iter.getOffset());
-                    debug("   getP=" + getP);
-                }
-                
-                // Get the previous character in both the pattern and the target
-                tval = iter.previous() & mask;
-                
-                if (getP) pval = valueList[--patIndex];
-                getP = true;
-                
-                if (DEBUG) debug("   pval=" + Integer.toString(pval,16) + ", tval=" + Integer.toString(tval,16));
-                
-                if (tval == 0) {       // skip tval, use same pval
-                    if (DEBUG) debug("   tval is ignorable");
-                    getP = false;    
-                }
-                else if (pval != tval) {    // Mismatch, skip ahead
-                    if (DEBUG) debug("   mismatch: skippping " + getShift(tval, patIndex));
-                    
-                    index += getShift(tval, patIndex);
-                    break;
-                }
-                else if (patIndex == 0) {
-                    // The values matched, and we're at the beginning of the pattern,
-                    // which means we matched the whole thing.
-                    start = iter.getOffset();
-                    setMatchLength(matchEnd - start);
-                    if (DEBUG) debug("Found match at index "+ start );
-                    return start;
-                }
+    	if (m_pattern_.m_CELength_ == 0) {
+            matchLength = 0;
+            if (m_matchedIndex_ == DONE && start == m_textBeginOffset_) {
+                m_matchedIndex_ = start;
+                return m_matchedIndex_;
             }
-            if (DEBUG) {
-				debug(" end of inner loop: patIndex=" + patIndex + " iter=" + iter.getOffset());
-				debug("   getP=" + getP);
-			}
             
-            if (index == matchEnd) {
-                // We hit the beginning of the text being searched, which is
-                // possible if it contains lots of ignorable characters.
-                // Advance one character and try again.
-                if (DEBUG) debug("hit beginning of target; advance by one");
-                index++;
-            }
-        }
-        if (DEBUG) debug("Fell off end of outer loop; returning DONE");
-        return DONE;
+    		targetText.setIndex(start);
+	        char ch = targetText.current();
+	        // ch can never be done, it is handled by next()
+	        char ch2 = targetText.next();
+	        if (ch2 == CharacterIterator.DONE) {
+	            m_matchedIndex_ = DONE;	
+	        }
+	        else {
+	            m_matchedIndex_ = targetText.getIndex();
+	        }
+	        if (UTF16.isLeadSurrogate(ch) && UTF16.isTrailSurrogate(ch2)) {
+	            targetText.next();
+	            m_matchedIndex_ = targetText.getIndex();
+	        }
+	    }
+	    else {
+	    	if (matchLength != 0) {
+		    	start += matchLength;
+		    }
+    
+	        // status checked below
+	        if (m_isCanonicalMatch_) {
+	            // can't use exact here since extra accents are allowed.
+	            handleNextCanonical(start);
+	        }
+	        else {
+	            handleNextExact(start);
+	        }
+	    }
+        targetText.setIndex(m_matchedIndex_);
+    	return m_matchedIndex_;
     }
-
+    
     /**
-     * Search backward for matching text ,starting at a given location.
-     * Clients should not call this method directly; instead they should call
-     * <code>SearchIterator.previous()</code>, which this method overrides.
-     * <p>
-     * If a match is found, this method returns the index at which the match
-     * starts and calls {@link SearchIterator#setMatchLength}
-     * with the number of characters in the target
-     * text that make up the match.  If no match is found, the method returns
-     * <code>DONE</code> and does not call <tt>setMatchLength</tt>.
-     * <p>
-     * @param start The index in the target text at which the search starts.
-     *
-     * @return      The index at which the matched text in the target starts, or DONE
-     *              if no match was found.
-     * <p>
-     * @see SearchIterator#previous
-     * @see SearchIterator#DONE
-     */
-    protected int handlePrev(int start)
+	 * <p>
+     * Concrete method to provide the mechanism 
+     * for finding the next <b>backwards</b> match in the target text.
+     * See super class documentation for its use.
+     * </p>  
+     * @param start index in the target text at which the backwards search 
+     *        should begin.
+     * @return the starting index of the next backwards match if found, DONE 
+     *         otherwise
+     * @see #handleNext(int)
+     * @see #DONE
+	 */
+    protected int handlePrevious(int start)
     {
-        int patLen = normLen;
-        int index = start - minLen;
-
-        int mask = getMask(strength);
-        int done = CollationElementIterator.NULLORDER & mask;
-
-        if (DEBUG) {
-            debug("-------------------------handlePrev-----------------------------------");
-            debug("");
-            debug("strength=" + strength + ", mask=" + Integer.toString(mask,16)
-                + ", done=" + Integer.toString(done,16));
-            debug("decomp=" + collator.getDecomposition());
-            
-            debug("target.begin=" + getTarget().getBeginIndex());
-            debug("target.end=" + getTarget().getEndIndex());
-            debug("start = " + start);
-        }
-        
-        while (index >= 0) {
-            int patIndex = 0;
-            int tval = 0, pval = 0;
-            boolean getP = true;
-
-            iter.setOffset(index);
-
-            if (DEBUG) debug(" outer loop: patIndex=" + patIndex + ", index=" + index);
-            
-            while ((patIndex < patLen || !getP) && iter.getOffset() < start)
-            {
-                if (DEBUG) {
-                    debug("  inner loop: patIndex=" + patIndex + " iter=" + iter.getOffset());
-                    debug("   getP=" + getP);
-                }
-                tval = iter.next() & mask;
-                if (getP) pval = valueList[patIndex++];
-                getP = true;
-
-                if (DEBUG) debug("   pval=" + Integer.toString(pval,16) + ", tval=" + Integer.toString(tval,16));
-
-                if (tval == done) {
-                    if (DEBUG) debug("   end of target; no match");
-                    return DONE;
-                }
-                else if (tval == 0) {
-                    if (DEBUG) debug("   tval is ignorable");
-                    getP = false;
-                }
-                else if (pval != tval) {
-                    // We didn't match this pattern.  Skip ahead
-                    if (DEBUG) debug("   mismatch: skippping " + getBackShift(tval, patIndex));
-                    
-                    int shift = getBackShift(tval, patIndex);
-                    index -= shift;
-                    break;
-                }
-                else if (patIndex == patLen) {
-                    // The elements matched and we're at the end of the pattern,
-                    // which means we matched the whole thing.
-                    setMatchLength(iter.getOffset() - index);
-                    if (DEBUG) debug("Found match at index "+ start );
-                    return index;
-                }
+    	if (m_pattern_.m_CELength_ == 0) {
+            matchLength = 0;
+	        // start can never be DONE or 0, it is handled in previous
+            targetText.setIndex(start);
+            char ch = targetText.previous();
+            if (ch == CharacterIterator.DONE) {
+            	m_matchedIndex_ = DONE;
             }
-            if (DEBUG) {
-				debug(" end of inner loop: patIndex=" + patIndex + " iter=" + iter.getOffset());
-				debug("   getP=" + getP);
-			}
-            if (iter.getOffset() >= start) {
-                // We hit the end of the text being searched, which is
-                // possible if it contains lots of ignorable characters.
-                // Back up one character and try again.
-                if (DEBUG) debug("hit end of target; back by one");
-                index--;
+            else {
+            	m_matchedIndex_ = targetText.getIndex();
+	            if (UTF16.isTrailSurrogate(ch)) {
+	            	if (UTF16.isLeadSurrogate(targetText.previous())) {
+	                    m_matchedIndex_ = targetText.getIndex();
+	            	}
+	            }
+            }            
+        }
+        else {
+            if (m_isCanonicalMatch_) {
+                // can't use exact here since extra accents are allowed.
+                handlePreviousCanonical(start);
+            }
+            else {
+                handlePreviousExact(start);
             }
         }
-        if (DEBUG) debug("Fell off end of outer loop; returning DONE");
-        return DONE;
+
+        targetText.setIndex(m_matchedIndex_);
+        return m_matchedIndex_;
     }
 
-    /**
-     * Return a bitmask that will select only the portions of a collation 
-     * element that are significant at the given strength level.
-     */
-    private static final int getMask(int strength) {
-        switch (strength) {
-            case Collator.PRIMARY:
-                return 0xFFFF0000;
-            case Collator.SECONDARY:
-                return 0xFFFFFF00;
-            default:
-                return 0xFFFFFFFF;
-        }
-    }
+    // private static inner classes ----------------------------------------
     
-
-    //------------------------------------------------------------------------
-    // Private Data
-    //
-    private CollationElementIterator    iter;
-    private RuleBasedCollator           collator;
-    private int                         strength;
-    
-    //------------------------------------------------------------------------
-    // Everything from here on down is the data used to represent the
-    // Boyer-Moore shift tables and the code that generates and manipulates
-    // them.
-    //
-    private static final int MAX_TABLE = 256;        // Size of the shift tables
-    
-    private int     valueList[] = null;
-    private int     shiftTable[] = new int[MAX_TABLE];
-    private int     backShiftTable[] = new int[MAX_TABLE];
-
-    private String  pattern;            // The pattern string
-    private int     normLen = 0;        // num. of collation elements in pattern.
-    private int     minLen = 0;         // Min of composed, decomposed versions
-    private int     maxLen = 0;         // Max
-
-    private void initialize() {
-        if (DEBUG)  {
-            debug("-------------------------initialize-----------------------------------");
-            debug("pattern=" + pattern);
-        }
+    private static class Pattern 
+    {
+    	// protected methods -----------------------------------------------
+    	
+    	/**
+    	 * Pattern string
+    	 */
+    	protected String targetText;
+        /**
+         * Array containing the collation elements of targetText
+         */
+        protected int m_CE_[];
+        /**
+         * Number of collation elements in m_CE_
+         */
+        protected int m_CELength_; 
+        /**
+         * Flag indicator if targetText starts with an accent
+         */
+        protected boolean m_hasPrefixAccents_;
+        /**
+         * Flag indicator if targetText ends with an accent
+         */
+        protected boolean m_hasSuffixAccents_;
+        /**
+         * Default number of characters to shift for Boyer Moore
+         */
+        protected int m_defaultShiftSize_;
+        /**
+         * Number of characters to shift for Boyer Moore, depending on the
+         * source text to search
+         */
+        protected char m_shift_[];
+        /**
+         * Number of characters to shift backwards for Boyer Moore, depending 
+         * on the source text to search
+         */
+        protected char m_backShift_[];
         
-        CollationElementIterator iter = collator.getCollationElementIterator(pattern);
-
-        int mask = getMask(strength);
-
-        // See how many non-ignorable collation keys are in the text
-        normLen = 0;
-        int elem;
-        while ((elem = iter.next()) != CollationElementIterator.NULLORDER)
+        // protected constructors ------------------------------------------
+        
+        /**
+         * Empty constructor 
+         */
+        protected Pattern(String pattern) 
         {
-            if ((elem & mask) != 0) {
-                normLen++;
-            }
+        	targetText = pattern;
+        	m_CE_ = new int[INITIAL_ARRAY_SIZE_];	
+        	m_CELength_ = 0;
+        	m_hasPrefixAccents_ = false;
+        	m_hasSuffixAccents_ = false;
+        	m_defaultShiftSize_ = 1;		
+        	m_shift_ = new char[MAX_TABLE_SIZE_];
+        	m_backShift_ = new char[MAX_TABLE_SIZE_];
         }
+    };
 
-        // Save them all
-        valueList = new int[normLen];
-        int expandLen = 0;
-        iter.reset();
-        
-        for (int i = 0; i < normLen; i++)
-        {
-            elem = iter.next();
-
-            if ((elem & mask) != 0) {
-                valueList[i] = elem & mask;
-                
-            }
-            // Keep track of whether there are any expanding-character
-            // sequences that can result in one of the characters that's in
-            // the pattern.  If there are, we have to reduce the shift
-            // distances calculated below to account for it.
-            expandLen += iter.getMaxExpansion(elem) - 1;
-        }
-
-        //
-        // We need to remember the size of the composed and decomposed
-        // versions of the string.  Standard Boyer-Moore shift calculations
-        // can be wrong by an amount up to that difference, since a small
-        // small number of characters in the pattern can map to a larger
-        // number in the text being searched, or vice-versa.
-        //
-        int uniLen = pattern.length();
-        maxLen = Math.max(normLen, uniLen);
-        minLen = Math.min(normLen, uniLen) - expandLen;
-
-        if (DEBUG) debug("normLen=" + normLen + ", expandLen=" + expandLen
-                        + ", maxLen=" + maxLen + ", minLen=" + minLen);
-        
-        // Now initialize the shift tables
-        //
-        // NOTE: This is the most conservative way to build them.  If we had a way
-        // of knowing that there were no expanding/contracting chars in the rules,
-        // we could get rid of the "- 1" in the shiftTable calculations.
-        // But all of the default collators have at least one expansion or
-        // contraction, so it probably doesn't matter anyway.
-        //
-        for (int i = 0; i < MAX_TABLE; i++) {
-            shiftTable[i] = backShiftTable[i] = minLen;
-        }
-
-        for (int i = 0; i < normLen-1; i++) {
-            shiftTable[hash(valueList[i])] = Math.max(minLen - i - 1, 1);
-        }
-        shiftTable[hash(valueList[normLen-1])] = 1;
-        
-        for (int i = normLen - 1; i > 0; i--) {
-            backShiftTable[hash(valueList[i])] = i;
-        }
-        backShiftTable[hash(valueList[0])] = 1;
-
-        if (DEBUG) dumpTables();
-    }
 
+    // private data members ------------------------------------------------
+    
     /**
-     * Method used by StringSearch to determine how far to the right to
-     * shift the pattern during a Boyer-Moore search.  
-     *
-     * @param curValue  The current value in the target text
-     * @param curIndex  The index in the pattern at which we failed to match
-     *                  curValue in the target text.
+     * target text begin offset. Each targetText has a valid contiguous region 
+     * to iterate and this data member is the offset to the first such
+     * character in the region.
      */
-    private int getShift( int curValue, int curIndex ) {
-        int shiftAmt = shiftTable[hash(curValue)];
-
-        // if (minLen != maxLen) {
-            int adjust = normLen - curIndex;
-            // if (shiftAmt > adjust + 1) {
-            if (adjust > 1 && shiftAmt >= adjust) {
-                if (DEBUG) debug("getShift: adjusting by " + adjust);
-                // shiftAmt -= adjust;
-                shiftAmt -= adjust - 1;
-            }
-        // }
-        return shiftAmt;
-    }
-
+    private int m_textBeginOffset_;
     /**
-     * Method used by StringSearch to determine how far to the left to
-     * shift the pattern during a reverse Boyer-Moore search.  
-     *
-     * @param curValue  The current value in the target text
-     * @param curIndex  The index in the pattern at which we failed to match
-     *                  curValue in the target text.
+     * target text limit offset. Each targetText has a valid contiguous region 
+     * to iterate and this data member is the offset to 1 after the last such
+     * character in the region.
      */
-    private int getBackShift( int curValue, int curIndex ) {
-        int shiftAmt = backShiftTable[hash(curValue)];
+    private int m_textLimitOffset_;
+    /**
+     * Upon completion of a search, m_matchIndex_ will store starting offset in
+     * m_text for the match. The Value DONE is the default value. 
+     * If we are not at the start of the text or the end of the text and 
+     * m_matchedIndex_ is DONE it means that we can find any more matches in 
+     * that particular direction
+     */
+    private int m_matchedIndex_; 
+    /**
+     * Current pattern to search for
+     */
+    private Pattern m_pattern_;
+    /**
+     * Collator whose rules are used to perform the search
+     */
+    private RuleBasedCollator m_collator_;
+    /** 
+     * The collation element iterator for the text source.
+     */
+    private CollationElementIterator m_colEIter_;
+    /** 
+     * Utility collation element, used throughout program for temporary 
+     * iteration.
+     */
+    private CollationElementIterator m_utilColEIter_;
+    /**
+     * The mask used on the collation elements to retrieve the valid strength
+     * weight 
+     */
+    private int m_ceMask_;
+    /**
+     * Buffer storing accents during a canonical search
+     */
+    private StringBuffer m_canonicalPrefixAccents_;
+    /**
+     * Buffer storing accents during a canonical search
+     */
+    private StringBuffer m_canonicalSuffixAccents_;
+    /**
+     * Flag to indicate if canonical search is to be done.
+     * E.g looking for "a\u0300" in "a\u0318\u0300" will yield the match at 0.
+     */
+    private boolean m_isCanonicalMatch_;
+    /**
+     * Size of the shift tables
+     */
+    private static final int MAX_TABLE_SIZE_ = 257; 
+    /**
+     * Initial array size
+     */
+    private static final int INITIAL_ARRAY_SIZE_ = 256;
+    /**
+     * Utility mask
+     */
+	private static final int SECOND_LAST_BYTE_SHIFT_ = 8;
+	/**
+     * Utility mask
+     */
+	private static final int LAST_BYTE_MASK_ = 0xff;
+	/**
+	 * Utility buffer for return values and temporary storage
+	 */
+	private int m_utilBuffer_[] = new int[2];
 
-        // if (minLen != maxLen) {
-            int adjust = curIndex;
-            // int adjust = normLen - (minLen - curIndex);
-            if (adjust > 1 && shiftAmt > adjust) {
-                shiftAmt -= adjust - 1;
-            }
-            /*
-            if (shiftAmt > adjust + 1) {
-                if (DEBUG) debug("getBackShift: adjusting by " + adjust);
-                shiftAmt -= adjust;
-            }
-            */
-        // }
-        return shiftAmt;
-    }
+	// private methods -------------------------------------------------------
 
     /**
      * Hash a collation element from its full size (32 bits) down into a
      * value that can be used as an index into the shift tables.  Right
      * now we do a modulus by the size of the hash table.
-     *
-     * TODO: At some point I should experiment to see whether a slightly
-     * more complicated hash function gives us a better distribution
-     * on multilingual text.  I doubt it will have much effect on
-     * performance, though.
+     * @param ce collation element
+	 * @return collapsed version of the collation element
      */
-    private static final int hash(int order) {
-        return CollationElementIterator.primaryOrder(order) % MAX_TABLE;
+    private static final int hash(int ce) 
+    {
+        // the old value UCOL_PRIMARYORDER(ce) % MAX_TABLE_SIZE_ does not work
+    	// well with the new collation where most of the latin 1 characters
+    	// are of the value xx000xxx. their hashes will most of the time be 0
+    	// to be discussed on the hash algo.
+    	return CollationElementIterator.primaryOrder(ce) % MAX_TABLE_SIZE_;
     }
-
-
-    //-------------------------------------------------------------------------
-    // Debugging support...
-    //-------------------------------------------------------------------------
-
-    static private final boolean DEBUG = false;
-
-    static void debug(String str) {
-        System.out.println(str);
-    }
-
-    void dumpTables() {
-        for (int i = 0; i < MAX_TABLE; i++) {
-            if (shiftTable[i] != minLen) {
-                debug("shift[" + Integer.toString(i,16) + "] = " + shiftTable[i]);
+    
+    /**
+	 * Gets the fcd value for a character at the argument index.
+	 * This method takes into accounts of the supplementary characters.
+	 * Note this method changes the offset in the character iterator.
+	 * @param str UTF16 string where character for fcd retrieval resides
+	 * @param offset position of the character whose fcd is to be retrieved
+	 * @return fcd value
+	 */
+	private static final char getFCD(CharacterIterator str, int offset)
+	{
+	    str.setIndex(offset);
+	    char ch = str.current();
+	    char result = NormalizerImpl.getFCD16(ch);
+	    
+	    if ((result != 0) && (str.getEndIndex() != offset + 1) && 
+	    	UTF16.isLeadSurrogate(ch)) {
+	        ch = str.next();
+	        if (UTF16.isTrailSurrogate(ch)) {
+	            result = NormalizerImpl.getFCD16FromSurrogatePair(result, ch);
+	        } else {
+	            result = 0;
+	        }
+	    }
+	    return result;
+	}
+	
+	/**
+	 * Gets the fcd value for a character at the argument index.
+	 * This method takes into accounts of the supplementary characters.
+	 * @param str UTF16 string where character for fcd retrieval resides
+	 * @param offset position of the character whose fcd is to be retrieved
+	 * @return fcd value
+	 */
+	private static final char getFCD(String str, int offset)
+	{
+	    char ch = str.charAt(offset);
+	    char result = NormalizerImpl.getFCD16(ch);
+	    
+	    if ((result != 0) && (str.length() != offset + 1) && 
+	    	UTF16.isLeadSurrogate(ch)) {
+	        ch = str.charAt(offset + 1);
+	        if (UTF16.isTrailSurrogate(ch)) {
+	            result = NormalizerImpl.getFCD16FromSurrogatePair(result, ch);
+	        } else {
+	            result = 0;
+	        }
+	    }
+	    return result;
+	}
+	
+	/**
+	* Getting the modified collation elements taking into account the collation 
+	* attributes
+	* @param ce 
+	* @return the modified collation element
+	*/
+	private final int getCE(int ce)
+	{
+	    // note for tertiary we can't use the collator->tertiaryMask, that
+	    // is a preprocessed mask that takes into account case options. since
+	    // we are only concerned with exact matches, we don't need that.
+	    ce &= m_ceMask_;
+	    
+	    if (m_collator_.isAlternateHandling(true)) {
+	        // alternate handling here, since only the 16 most significant 
+	        // digits is only used, we can safely do a compare without masking
+	        // if the ce is a variable, we mask and get only the primary values
+	        // no shifting to quartenary is required since all primary values
+	        // less than variabletop will need to be masked off anyway.
+	        if ((m_collator_.m_variableTopValue_  << 16) > ce) {
+	            if (m_collator_.getStrength() == Collator.QUATERNARY) {
+	                ce = CollationElementIterator.primaryOrder(ce);
+	            }
+	            else { 
+	                ce = CollationElementIterator.IGNORABLE;
+	            }
+	        }
+	    }
+	
+	    return ce;
+	}
+	
+	/**
+	 * Appends a int to a int array, increasing the size of the array when 
+	 * we are out of space.
+	 * @param offset in array to append to
+	 * @param value to append
+	 * @param array to append to
+	 * @return the array appended to, this could be a new and bigger array
+	 */
+	private static final int[] append(int offset, int value, int array[])
+	{
+		if (offset >= array.length) {
+			int temp[] = new int[offset + INITIAL_ARRAY_SIZE_];
+			System.arraycopy(array, 0, temp, 0, array.length);
+			array = temp;
+		}
+		array[offset] = value;
+		return array;
+	}
+	
+	/**
+	 * Initializing the ce table for a pattern. Stores non-ignorable collation 
+	 * keys. Table size will be estimated by the size of the pattern text. 
+	 * Table expansion will be perform as we go along. Adding 1 to ensure that 
+	 * the table size definitely increases.
+	 * Internal method, status assumed to be a success.
+	 * @return total number of expansions 
+	 */
+	private final int initializePatternCETable()
+	{
+	    m_utilColEIter_.setText(m_pattern_.targetText);
+	    
+	    int offset = 0;
+	    int result = 0;
+	    int ce = m_utilColEIter_.next();
+	
+	    while (ce != CollationElementIterator.NULLORDER) {
+	        int newce = getCE(ce);
+	        if (newce != CollationElementIterator.IGNORABLE) {
+	            m_pattern_.m_CE_ = append(offset, newce, m_pattern_.m_CE_);
+	            offset ++;	        
+	        }
+	        result += m_utilColEIter_.getMaxExpansion(ce) - 1;
+	        ce = m_utilColEIter_.next();
+	    }
+	
+	    m_pattern_.m_CE_ = append(offset, 0, m_pattern_.m_CE_);
+	    m_pattern_.m_CELength_ = offset;
+	
+	    return result;
+	}
+	
+	/**
+	 * Initializes the pattern struct.
+	 * Internal method, status assumed to be success.
+	 * @return expansionsize the total expansion size of the pattern
+	 */ 
+	private final int initializePattern()
+	{
+	    m_pattern_.m_hasPrefixAccents_ = (getFCD(m_pattern_.targetText, 0) 
+	    									 >> SECOND_LAST_BYTE_SHIFT_) != 0;
+	    m_pattern_.m_hasSuffixAccents_ = (getFCD(m_pattern_.targetText, 
+	    										 m_pattern_.targetText.length() 
+	    										 - 1) 
+	    									& LAST_BYTE_MASK_) != 0;
+	    // since intializePattern is an internal method status is a success.
+	    return initializePatternCETable();   
+	}
+	
+	/**
+	 * Initializing shift tables, with the default values.
+	 * If a corresponding default value is 0, the shift table is not set.
+	 * @param shift table for forwards shift 
+	 * @param backshift table for backwards shift
+	 * @param cetable table containing pattern ce
+	 * @param cesize size of the pattern ces
+	 * @param expansionsize total size of the expansions
+	 * @param defaultforward the default forward value
+	 * @param defaultbackward the default backward value
+	 */
+	 private final void setShiftTable(char shift[], 
+	 											   char backshift[], 
+					                         	   int cetable[], int cesize, 
+                          					 	   int expansionsize,
+					                         	   char defaultforward,
+                          					 	   char defaultbackward)
+	{
+	    // estimate the value to shift. to do that we estimate the smallest 
+	    // number of characters to give the relevant ces, ie approximately
+	    // the number of ces minus their expansion, since expansions can come 
+	    // from a character.
+	    for (int count = 0; count < MAX_TABLE_SIZE_; count ++) {
+	        shift[count] = defaultforward;
+	    }
+	    cesize --; // down to the last index
+	    for (int count = 0; count < cesize; count ++) {
+	        // number of ces from right of array to the count
+	        int temp = defaultforward - count - 1;
+	        shift[hash(cetable[count])] = temp > 1 ? ((char)temp) : 1;
+	    }
+	    shift[hash(cetable[cesize])] = 1;
+	    // for ignorables we just shift by one. see test examples.
+	    shift[hash(0)] = 1;
+	    
+	    for (int count = 0; count < MAX_TABLE_SIZE_; count ++) {
+	        backshift[count] = defaultbackward;
+	    }
+	    for (int count = cesize; count > 0; count --) {
+	        // the original value count does not seem to work
+	        backshift[hash(cetable[count])] = (char)(count > expansionsize ? 
+	                                          		count - expansionsize : 1);
+	    }
+	    backshift[hash(cetable[0])] = 1;
+	    backshift[hash(0)] = 1;
+	}
+	
+	/**
+	 * <p>Building of the pattern collation element list and the Boyer Moore 
+	 * StringSearch table.</p>
+	 * <p>The canonical match will only be performed after the default match 
+	 * fails.</p>
+	 * <p>For both cases we need to remember the size of the composed and 
+	 * decomposed versions of the string. Since the Boyer-Moore shift 
+	 * calculations shifts by a number of characters in the text and tries to 
+	 * match the pattern from that offset, the shift value can not be too large 
+	 * in case we miss some characters. To choose a right shift size, we 
+	 * estimate the NFC form of the and use its size as a shift guide. The NFC 
+	 * form should be the small possible representation of the pattern. Anyways, 
+	 * we'll err on the smaller shift size. Hence the calculation for 
+	 * minlength. Canonical match will be performed slightly differently. We'll 
+	 * split the pattern into 3 parts, the prefix accents (PA), the middle 
+	 * string bounded by the first and last base character (MS), the ending 
+	 * accents (EA). Matches will be done on MS first, and only when we match 
+	 * MS then some processing will be required for the prefix and end accents 
+	 * in order to determine if they match PA and EA. Hence the default shift 
+	 * values for the canonical match will take the size of either end's accent 
+	 * into consideration. Forwards search will take the end accents into 
+	 * consideration for the default shift values and the backwards search will 
+	 * take the prefix accents into consideration.</p>
+	 * <p>If pattern has no non-ignorable ce, we return a illegal argument 
+	 * error.</p>
+	 */ 
+	private final void initialize()
+	{
+	    int expandlength  = initializePattern();   
+	    if (m_pattern_.m_CELength_ > 0) {
+	        char minlength = (char)(m_pattern_.m_CELength_ > expandlength 
+	        					? m_pattern_.m_CELength_ - expandlength : 1);
+	        m_pattern_.m_defaultShiftSize_ = minlength;
+            setShiftTable(m_pattern_.m_shift_, m_pattern_.m_backShift_, 
+	        			  m_pattern_.m_CE_, m_pattern_.m_CELength_, 
+	        			  expandlength, minlength, minlength);
+	    }
+	    else {
+	    	m_pattern_.m_defaultShiftSize_ = 0;
+	    }
+	}
+	
+	/**
+	 * Determine whether the search text bounded by the offset start and end is 
+	 * one or more whole units of text as determined by the breakiterator in 
+	 * StringSearch.
+	 * @param start target text start offset
+	 * @param end target text end offset
+	 */
+	private final boolean isBreakUnit(int start, int end) 
+	{
+	    if (breakIterator != null) {
+	        int startindex = breakIterator.first();
+	        int endindex   = breakIterator.last();
+	        
+	        // out-of-range indexes are never boundary positions
+	        if (start < startindex || start > endindex || end < startindex 
+	        	|| end > endindex) {
+	            return false;
+	        }
+	        // otherwise, we can use following() on the position before the 
+	        // specified one and return true of the position we get back is the 
+	        // one the user specified
+	        boolean result = (start == startindex 
+	        				  || breakIterator.following(start - 1) == start) 
+	        				 && (end == endindex 
+	        				 	 || breakIterator.following(end - 1) == end);
+	        if (result) {
+	            // iterates the individual ces
+	            m_utilColEIter_.setText(targetText);
+	            m_utilColEIter_.setExactOffset(start);
+	            for (int count = 0; count < m_pattern_.m_CELength_;
+	                 count ++) {
+	                if (getCE(m_utilColEIter_.next()) 
+	                	!= m_pattern_.m_CE_[count]) {
+	                    return false;
+	                }
+	            }
+	            if (m_utilColEIter_.next() 
+	            		!= CollationElementIterator.NULLORDER 
+	            	&& m_utilColEIter_.getOffset() == end) {
+	                // extra collation elements at the end of the match
+	                return false;
+	            }
+	        }
+	        return result;
+	    }
+	    return true;
+	}
+	
+	/**
+	 * Getting the next base character offset if current offset is an accent, 
+	 * or the current offset if the current character contains a base character. 
+	 * accents the following base character will be returned
+	 * @param text string
+	 * @param textoffset current offset
+	 * @param textlength length of text string
+	 * @return the next base character or the current offset
+	 *         if the current character is contains a base character.
+	 */
+	private final int getNextBaseOffset(CharacterIterator text, 
+	                                           		 int textoffset)
+	{
+	    if (textoffset < text.getEndIndex()) {
+	        while (text.getIndex() < text.getEndIndex()) { 
+	            int result = textoffset;
+	            if ((getFCD(text, textoffset ++) 
+	                		>> SECOND_LAST_BYTE_SHIFT_) == 0) {
+	                 return result;
+	            }
+	        }
+	        return text.getEndIndex();
+	    }
+	    return textoffset;
+	}
+	
+	/**
+	 * Gets the next base character offset depending on the string search 
+	 * pattern data
+	 * @param textoffset one offset away from the last character
+	 *                   to search for.
+	 * @return start index of the next base character or the current offset
+	 *         if the current character is contains a base character.
+	 */
+	private final int getNextBaseOffset(int textoffset)
+	{
+	    if (m_pattern_.m_hasSuffixAccents_ 
+	    	&& textoffset < m_textLimitOffset_) {
+            targetText.setIndex(textoffset);
+            targetText.previous();
+            if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_) != 0) {
+                return getNextBaseOffset(targetText, textoffset);
             }
-        }
-        for (int i = 0; i < MAX_TABLE; i++) {
-            if (backShiftTable[i] != minLen) {
-                debug("backShift[" + Integer.toString(i,16) + "] = " + backShiftTable[i]);
-            }
-        }
+	    }
+	    return textoffset;
+	}
+	
+	/**
+	 * Shifting the collation element iterator position forward to prepare for
+	 * a following match. If the last character is a unsafe character, we'll 
+	 * only shift by 1 to capture contractions, normalization etc.
+	 * Internal method, status assumed to be success.
+	 * @param textoffset start text position to do search
+	 * @param ce the text ce which failed the match.
+	 * @param patternceindex index of the ce within the pattern ce buffer which
+	 *        failed the match
+	 * @return final offset
+	 */
+	private int shiftForward(int textoffset, int ce, int patternceindex)
+									
+	{
+	    if (isOverlapping()) {
+	        if (textoffset > m_textBeginOffset_) {
+	            textoffset ++;
+	        }
+	        else {
+	            textoffset = m_pattern_.m_defaultShiftSize_;
+	        }
+	    }
+	    else {
+	        if (ce != CollationElementIterator.NULLORDER) {
+	            int shift = m_pattern_.m_shift_[hash(ce)];
+	            // this is to adjust for characters in the middle of the 
+	            // substring for matching that failed.
+	            int adjust = m_pattern_.m_CELength_ - patternceindex;
+	            if (adjust > 1 && shift >= adjust) {
+	                shift -= adjust - 1;
+	            }
+	            textoffset += shift;
+	        }
+	        else {
+	            textoffset += m_pattern_.m_defaultShiftSize_;
+	        }
+	    }
+	     
+        textoffset = getNextBaseOffset(textoffset);
+	    // check for unsafe characters
+	    // * if it is the start or middle of a contraction: to be done after 
+	    //   a initial match is found
+	    // * thai or lao base consonant character: similar to contraction
+	    // * high surrogate character: similar to contraction
+	    // * next character is a accent: shift to the next base character
+	    return textoffset;
+	}
+	
+	/**
+	 * Gets the offset to the next safe point in text.
+	 * ie. not the middle of a contraction, swappable characters or 
+	 * supplementary characters.
+	 * @param textoffset offset in string
+	 * @param end offset in string
+	 * @return offset to the next safe character
+	 */
+	private final int getNextSafeOffset(int textoffset, int end)
+	{
+	    int result = textoffset; // first contraction character
+	    targetText.setIndex(result);
+	    while (result != end && 
+	    	m_collator_.isUnsafe(targetText.current())) {
+	       	result ++;
+	       	targetText.setIndex(result);
+	    }
+	    return result; 
+	}
+	
+	/** 
+	 * This checks for accents in the potential match started with a composite 
+	 * character.
+	 * This is really painful... we have to check that composite character do 
+	 * not have any extra accents. We have to normalize the potential match and 
+	 * find the immediate decomposed character before the match.
+	 * The first composite character would have been taken care of by the fcd 
+	 * checks in checkForwardExactMatch.
+	 * This is the slow path after the fcd of the first character and 
+	 * the last character has been checked by checkForwardExactMatch and we 
+	 * determine that the potential match has extra non-ignorable preceding
+	 * ces.
+	 * E.g. looking for \u0301 acute in \u01FA A ring above and acute, 
+	 * checkExtraMatchAccent should fail since there is a middle ring in 
+	 * \u01FA Note here that accents checking are slow and cautioned in the API 
+	 * docs.
+	 * Internal method, status assumed to be a success, caller should check 
+	 * status before calling this method
+	 * @param start index of the potential unfriendly composite character
+	 * @param end index of the potential unfriendly composite character
+	 * @return true if there is non-ignorable accents before at the beginning
+	 *              of the match, false otherwise.
+	 */
+	private final boolean checkExtraMatchAccents(int start, int end)
+	{
+	    boolean result = false;
+	    if (m_pattern_.m_hasPrefixAccents_) {
+	        targetText.setIndex(start);
+	        
+	        if (UTF16.isLeadSurrogate(targetText.next())) {
+	        	if (!UTF16.isTrailSurrogate(targetText.next())) {
+	        		targetText.previous();
+	        	}
+	        }
+	        // we are only concerned with the first composite character
+	        String str = getString(targetText, start, end);
+	        if (Normalizer.quickCheck(str, Normalizer.NFD) 
+	        										== Normalizer.NO) {
+	            int safeoffset = getNextSafeOffset(start, end);
+	            if (safeoffset != end) {
+	                safeoffset ++;
+	            }
+	            String decomp = Normalizer.decompose(
+	            				str.substring(0, safeoffset - start), false);
+	            m_utilColEIter_.setText(decomp);
+	            int firstce = m_pattern_.m_CE_[0];
+	            boolean ignorable = true;
+	            int ce = CollationElementIterator.IGNORABLE;
+	            int offset = 0;
+	            while (ce != firstce) {
+	            	offset = m_utilColEIter_.getOffset();
+	                if (ce != firstce 
+	                	&& ce != CollationElementIterator.IGNORABLE) {
+	                    ignorable = false;
+	                }
+	                ce = m_utilColEIter_.next();
+	            }
+                m_utilColEIter_.setExactOffset(offset); // back up 1 to the 
+                m_utilColEIter_.previous();             // right offset
+                offset = m_utilColEIter_.getOffset();
+                result = !ignorable && (UCharacter.getCombiningClass(
+	            							UTF16.charAt(decomp, offset)) != 0);
+	        }
+	    }
+	
+	    return result;
+	}
+	
+	/**
+	* Used by exact matches, checks if there are accents before the match. 
+	* This is really painful... we have to check that composite characters at
+	* the start of the matches have to not have any extra accents. 
+	* We check the FCD of the character first, if it starts with an accent and 
+	* the first pattern ce does not match the first ce of the character, we 
+	* bail.
+	* Otherwise we try normalizing the first composite 
+	* character and find the immediate decomposed character before the match to 
+	* see if it is an non-ignorable accent.
+	* Now normalizing the first composite character is enough because we ensure 
+	* that when the match is passed in here with extra beginning ces, the 
+	* first or last ce that match has to occur within the first character.
+	* E.g. looking for \u0301 acute in \u01FA A ring above and acute, 
+	* checkExtraMatchAccent should fail since there is a middle ring in \u01FA
+	* Note here that accents checking are slow and cautioned in the API docs.
+	* @param start offset 
+	* @param end offset
+	* @return true if there are accents on either side of the match, 
+	*         false otherwise
+	*/
+	private final boolean hasAccentsBeforeMatch(int start, int end) 
+	{
+	    if (m_pattern_.m_hasPrefixAccents_) {
+	        // we have been iterating forwards previously
+	        boolean ignorable = true;
+	        int firstce = m_pattern_.m_CE_[0];
+			m_colEIter_.setExactOffset(start);
+	        int ce  = getCE(m_colEIter_.next());
+	        while (ce != firstce) {
+	            if (ce != CollationElementIterator.IGNORABLE) {
+	                ignorable = false;
+	            }
+	            ce = getCE(m_colEIter_.next());
+	        }
+	        if (!ignorable && m_colEIter_.isInBuffer()) {
+	            // within normalization buffer, discontiguous handled here
+	            return true;
+	        }
+	
+	        // within text
+	        boolean accent = (getFCD(targetText, start) >> SECOND_LAST_BYTE_SHIFT_)
+	        											!= 0; 
+	        if (!accent) {
+	            return checkExtraMatchAccents(start, end);
+	        }
+	        if (!ignorable) {
+	            return true;
+	        }
+	        if (start > m_textBeginOffset_) {
+	        	targetText.setIndex(start);
+	        	targetText.previous();
+	            if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_) 
+	            														!= 0) {
+	                m_colEIter_.setExactOffset(start);
+	                ce = m_colEIter_.previous();
+	                if (ce != CollationElementIterator.NULLORDER 
+	                	&& ce != CollationElementIterator.IGNORABLE) {
+	                    return true;
+	                }
+	            }
+	        }
+	    }
+	  
+	    return false;
+	}
+	
+	/**
+	 * Used by exact matches, checks if there are accents bounding the match.
+	 * Note this is the initial boundary check. If the potential match
+	 * starts or ends with composite characters, the accents in those
+	 * characters will be determined later.
+	 * Not doing backwards iteration here, since discontiguos contraction for 
+	 * backwards collation element iterator, use up too many characters.
+	 * E.g. looking for \u030A ring in \u01FA A ring above and acute, 
+	 * should fail since there is a acute at the end of \u01FA
+	 * Note here that accents checking are slow and cautioned in the API docs.
+	 * @param start offset of match
+	 * @param end end offset of the match
+	 * @return true if there are accents on either side of the match, 
+	 *         false otherwise
+	 */
+	private final boolean hasAccentsAfterMatch(int start, int end) 
+	{
+	    if (m_pattern_.m_hasSuffixAccents_) {
+	    	targetText.setIndex(end);
+	        if (end > m_textBeginOffset_ 
+	        	&& UTF16.isTrailSurrogate(targetText.previous())) {
+	        	if (targetText.getIndex() > m_textBeginOffset_ &&
+	        		!UTF16.isLeadSurrogate(targetText.previous())) {
+	        		targetText.next();
+	        	}
+	        }
+	        if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_) != 0) {
+	            int firstce  = m_pattern_.m_CE_[0];
+	            m_colEIter_.setExactOffset(start);
+	            while (getCE(m_colEIter_.next()) != firstce) {
+	            }
+	            int count = 1;
+	            while (count < m_pattern_.m_CELength_) {
+	                if (getCE(m_colEIter_.next()) 
+	                	== CollationElementIterator.IGNORABLE) {
+	                	count --;
+	                }
+	                count ++;
+	            }
+	            int ce = getCE(m_colEIter_.next());
+	            if (ce != CollationElementIterator.NULLORDER 
+	            			&& ce != CollationElementIterator.IGNORABLE) {
+	                if (m_colEIter_.getOffset() <= end) {
+	                    return true;
+	                }
+	                if ((getFCD(targetText, end) >> SECOND_LAST_BYTE_SHIFT_) 
+	                	!= 0) {
+	                    return true;
+	                }
+	            }
+	        }
+	    }
+	    return false;
+	}
+	
+	/**
+	* Checks if the offset runs out of the text string range
+	* @param textstart offset of the first character in the range
+	* @param textlimit limit offset of the text string range
+	* @param offset to test
+	* @return true if offset is out of bounds, false otherwise
+	*/
+	private static final boolean isOutOfBounds(int textstart, int textlimit, 
+												int offset)
+	{
+	    return offset < textstart || offset > textlimit;
+	}
+	
+	/**
+	 * Checks for identical match
+	 * @param strsrch string search data
+	 * @param start offset of possible match
+	 * @param end offset of possible match
+	 * @return true if identical match is found
+	 */
+	private final boolean checkIdentical(int start, int end) 
+	{
+	    if (m_collator_.getStrength() != Collator.IDENTICAL) {
+	        return true;
+	    }
+	
+		String textstr = getString(targetText, start, end - start);
+		if (Normalizer.quickCheck(textstr, Normalizer.NFD) 
+	        										== Normalizer.NO) {
+	        textstr = Normalizer.decompose(textstr, false);
+	    }
+	    String patternstr = m_pattern_.targetText;
+	    if (Normalizer.quickCheck(patternstr, Normalizer.NFD) 
+	        										== Normalizer.NO) {
+	        patternstr = Normalizer.decompose(patternstr, false);
+	    }
+	    return textstr.equals(patternstr);
+	}
+	
+	/**
+	 * Checks to see if the match is repeated
+	 * @param start new match start index
+	 * @param end new match end index
+	 * @return true if the the match is repeated, false otherwise
+	 */
+	private final boolean checkRepeatedMatch(int start, int end)
+	{
+	    if (m_matchedIndex_ == DONE) {
+	        return false;
+	    }
+	    int lastmatchlimit = m_matchedIndex_ + matchLength; 
+	    if (!isOverlapping()) {
+            return (start >= m_matchedIndex_ && start <= lastmatchlimit) 
+                    || (end >= m_matchedIndex_ && end <= lastmatchlimit);
+                      
+	    }
+	    return start == m_matchedIndex_;
+	}
+	
+	/**
+	 * Checks match for contraction. 
+	 * If the match ends with a partial contraction we fail.
+	 * If the match starts too far off (because of backwards iteration) we try 
+	 * to chip off the extra characters depending on whether a breakiterator 
+	 * has been used.
+	 * Temporary utility buffer used to return modified start and end.
+	 * @param start offset of potential match, to be modified if necessary
+	 * @param end offset of potential match, to be modified if necessary
+	 * @return true if match passes the contraction test, false otherwise.
+	 */
+	private final boolean checkNextExactContractionMatch(int start, int end) 
+	{
+	    // This part checks if either ends of the match contains potential 
+	    // contraction. If so we'll have to iterate through them
+	    char endchar = 0;
+	    if (end < m_textLimitOffset_) {
+	    	targetText.setIndex(end);
+	    	endchar = targetText.current();
+	    }
+	    char poststartchar = 0;
+	    if (start + 1 < m_textLimitOffset_) {
+	    	targetText.setIndex(start + 1);
+	    	poststartchar = targetText.current();
+	    }
+	    if (m_collator_.isUnsafe(endchar) 
+	    	|| m_collator_.isUnsafe(poststartchar)) {
+	        // expansion prefix, what's left to iterate
+	        int bufferedCEOffset = m_colEIter_.m_CEBufferOffset_;
+	        boolean hasBufferedCE = bufferedCEOffset > 0;
+	        m_colEIter_.setExactOffset(start);
+	        int temp = start;
+	        while (bufferedCEOffset > 0) {
+	            // getting rid of the redundant ce, caused by setOffset.
+	            // since backward contraction/expansion may have extra ces if 
+	            // we are in the normalization buffer, hasAccentsBeforeMatch 
+	            // would have taken care of it.
+	            // E.g. the character \u01FA will have an expansion of 3, but 
+	            // if we are only looking for acute and ring \u030A and \u0301, 
+	            // we'll have to skip the first ce in the expansion buffer.
+	            m_colEIter_.next();
+	            if (m_colEIter_.getOffset() != temp) {
+	                start = temp;
+	                temp  = m_colEIter_.getOffset();
+	            }
+	            bufferedCEOffset --;
+	        }
+	
+	        int count = 0;
+	        while (count < m_pattern_.m_CELength_) {
+	            int ce = getCE(m_colEIter_.next());
+	            if (ce == CollationElementIterator.IGNORABLE) {
+	                continue;
+	            }
+	            if (hasBufferedCE && count == 0 
+	            	&& m_colEIter_.getOffset() != temp) {
+	                start = temp;
+	                temp   = m_colEIter_.getOffset();
+	            }
+	            if (ce != m_pattern_.m_CE_[count]) {
+	                end ++;
+	                end = getNextBaseOffset(end);  
+	                m_utilBuffer_[0] = start;
+	                m_utilBuffer_[1] = end;
+	                return false;
+	            }
+	            count ++;
+	        }
+	    } 
+	    m_utilBuffer_[0] = start;
+	    m_utilBuffer_[1] = end;
+	    return true;
+	}
+	
+	
+	/**
+	 * Checks and sets the match information if found.
+	 * Checks 
+	 * <ul>
+	 * <li> the potential match does not repeat the previous match
+	 * <li> boundaries are correct
+	 * <li> exact matches has no extra accents
+	 * <li> identical matchesb
+	 * <li> potential match does not end in the middle of a contraction
+	 * </ul>
+	 * Otherwise the offset will be shifted to the next character.
+	 * The result m_matchIndex_ and m_matchLength_ will be set to the truncated
+	 * more fitting result value.
+	 * Uses the temporary utility buffer for storing the modified textoffset.
+	 * @param textoffset offset in the collation element text.
+	 * @return true if the match is valid, false otherwise
+	 */
+	private final boolean checkNextExactMatch(int textoffset)
+	{
+	    int start = m_colEIter_.getOffset();        
+	    if (!checkNextExactContractionMatch(start, textoffset)) {
+	    	// returns the modified textoffset
+	    	m_utilBuffer_[0] = m_utilBuffer_[1];
+	        return false;
+	    }
+	
+		start = m_utilBuffer_[0];
+		textoffset = m_utilBuffer_[1];
+	    // this totally matches, however we need to check if it is repeating
+	    if (!isBreakUnit(start, textoffset) 
+	    	|| checkRepeatedMatch(start, textoffset) 
+	    	|| hasAccentsBeforeMatch(start, textoffset) 
+	    	|| !checkIdentical(start, textoffset) 
+	    	|| hasAccentsAfterMatch(start, textoffset)) {
+	        textoffset ++;
+	        textoffset = getNextBaseOffset(textoffset);  
+	        m_utilBuffer_[0] = textoffset;
+	        return false;
+	    }
+	        
+	    // totally match, we will get rid of the ending ignorables.
+	    m_matchedIndex_  = start;
+	    matchLength = textoffset - start;
+	    return true;
+	}
+	
+	/**
+	* Getting the previous base character offset, or the current offset if the 
+	* current character is a base character
+	* @param text the source text to work on
+	* @param textoffset one offset after the current character
+	* @return the offset of the next character after the base character or the 
+	* 			first composed character with accents
+	*/
+	private final int getPreviousBaseOffset(CharacterIterator text, 
+	                                        int textoffset)
+	{
+	    if (textoffset > m_textBeginOffset_) {
+	        while (true) {
+	            int result = textoffset;
+	            text.setIndex(result);
+	            if (UTF16.isTrailSurrogate(text.previous())) {
+	            	if (text.getIndex() != text.getBeginIndex() &&
+	            		!UTF16.isLeadSurrogate(text.previous())) {
+	            		text.next();
+	            	}
+	            }
+	            textoffset = text.getIndex();
+	            char fcd = getFCD(text, textoffset);
+	            if ((fcd >> SECOND_LAST_BYTE_SHIFT_) == 0) {
+	                if ((fcd & LAST_BYTE_MASK_) != 0) {
+	                    return textoffset;
+	                }
+	                return result;
+	            }
+	            if (textoffset == m_textBeginOffset_) {
+	                return m_textBeginOffset_;
+	            }
+	        }
+	    }
+	    return textoffset;
+	}
+	
+	/**
+	* Getting the indexes of the accents that are not blocked in the argument
+	* accent array
+	* @param accents accents in nfd.
+	* @param accentsindex array to store the indexes of accents in accents that 
+	* 		are not blocked
+	* @return the length of populated accentsindex
+	*/
+	private int getUnblockedAccentIndex(StringBuffer accents, 
+										int accentsindex[])
+	{
+	    int index = 0;
+	    int length = accents.length();
+	    int cclass = 0;
+	    int result = 0;
+	    while (index < length) {
+	        int codepoint = UTF16.charAt(accents, index);
+	        int tempclass = UCharacter.getCombiningClass(codepoint);
+	        if (tempclass != cclass) {
+	            cclass = tempclass;
+	            accentsindex[result] = index;
+	            result ++;
+	        }
+	        if (UCharacter.isSupplementary(codepoint)) {
+	        	index += 2;
+	        }
+	        else {
+	        	index ++;
+	        }
+	    }
+	    accentsindex[result] = length;
+	    return result;
+	}
+
+	/**
+	 * Appends 3 StringBuffer/CharacterIterator together into a destination 
+	 * string buffer.
+	 * @param source1 string buffer
+	 * @param source2 character iterator
+	 * @param start2 start of the character iterator to merge
+	 * @param end2 end of the character iterator to merge
+	 * @param source3 string buffer
+	 * @return appended string buffer
+	 */
+	private static final StringBuffer merge(StringBuffer source1, 
+									 		CharacterIterator source2,
+									 		int start2, int end2,
+									 		StringBuffer source3) 
+	{
+		StringBuffer result = new StringBuffer();	
+		if (source1 != null && source1.length() != 0) {
+			result.append(source1);
+		}
+		source2.setIndex(start2);
+		while (source2.getIndex() < end2) {
+			result.append(source2.current());
+			source2.next();
+		}
+		if (source3 != null && source3.length() != 0) {
+			result.append(source3);
+		}
+	    return result;
+	}
+	
+	/**
+	* Running through a collation element iterator to see if the contents 
+	* matches pattern in string search data
+	* @param coleiter collation element iterator to test
+	* @return true if a match if found, false otherwise
+	*/
+	private final boolean checkCollationMatch(CollationElementIterator coleiter)
+	{
+	    int patternceindex = m_pattern_.m_CELength_;
+	    int offset = 0;
+	    while (patternceindex > 0) {
+	        int ce = getCE(coleiter.next());
+	        if (ce == CollationElementIterator.IGNORABLE) {
+	            continue;
+	        }
+	        if (ce != m_pattern_.m_CE_[offset]) {
+	            return false;
+	        }
+	        offset ++;
+	        patternceindex --;
+	    }
+	    return true;
+	}
+	
+	/**
+	 * Rearranges the front accents to try matching.
+	 * Prefix accents in the text will be grouped according to their combining 
+	 * class and the groups will be mixed and matched to try find the perfect 
+	 * match with the pattern.
+	 * So for instance looking for "\u0301" in "\u030A\u0301\u0325"
+	 * step 1: split "\u030A\u0301" into 6 other type of potential accent 
+	 * 		   substrings "\u030A", "\u0301", "\u0325", "\u030A\u0301", 
+	 * 		   "\u030A\u0325", "\u0301\u0325".
+	 * step 2: check if any of the generated substrings matches the pattern.
+	 * Internal method, status is assumed to be success, caller has to check 
+	 * status before calling this method.
+	 * @param start first offset of the accents to start searching
+	 * @param end start of the last accent set
+	 * @return DONE if a match is not found, otherwise return the starting
+	 *         offset of the match. Note this start includes all preceding 
+	 * 		   accents.
+	 */
+	private int doNextCanonicalPrefixMatch(int start, int end)
+	{
+	    if ((getFCD(targetText, start) & LAST_BYTE_MASK_) == 0) {
+	        // die... failed at a base character
+	        return DONE;
+	    }
+	
+		start = targetText.getIndex(); // index changed by fcd
+	    int offset = getNextBaseOffset(targetText, start);
+	    start = getPreviousBaseOffset(start);
+	
+	    StringBuffer accents = new StringBuffer();
+	    String accentstr = getString(targetText, start, offset - start);
+	    // normalizing the offensive string
+	    if (Normalizer.quickCheck(accentstr, Normalizer.NFD) 
+	        										== Normalizer.NO) {
+	        accentstr = Normalizer.decompose(accentstr, false);
+	    }
+	    accents.append(accentstr);
+	        
+	    int accentsindex[] = new int[INITIAL_ARRAY_SIZE_];      
+	    int accentsize = getUnblockedAccentIndex(accents, accentsindex);
+	    int count = (2 << (accentsize - 1)) - 2;  
+	    while (count > 0) {
+	    	// copy the base characters
+	    	m_canonicalPrefixAccents_.delete(0, 
+	    								m_canonicalPrefixAccents_.length());
+	    	int k = 0;
+	        for (; k < accentsindex[0]; k ++) {
+	            m_canonicalPrefixAccents_.append(accents.charAt(k));
+	        }
+	        // forming all possible canonical rearrangement by dropping
+	        // sets of accents
+	        for (int i = 0; i <= accentsize - 1; i ++) {
+	            int mask = 1 << (accentsize - i - 1);
+	            if ((count & mask) != 0) {
+	                for (int j = accentsindex[i]; j < accentsindex[i + 1]; 
+	                													j ++) {
+	                    m_canonicalPrefixAccents_.append(accents.charAt(j));
+	                }
+	            }
+	        }
+	        StringBuffer match = merge(m_canonicalPrefixAccents_,
+	                                   targetText, offset, end,
+	                                   m_canonicalSuffixAccents_);
+	            
+	        // if status is a failure, ucol_setText does nothing.
+	        // run the collator iterator through this match
+	        m_utilColEIter_.setText(match.toString());
+	        if (checkCollationMatch(m_utilColEIter_)) {
+	         	return start;
+	        }
+	        count --;
+	    }
+	    return DONE;
+	}
+
+	/**
+	* Gets the offset to the safe point in text before textoffset.
+	* ie. not the middle of a contraction, swappable characters or 
+	* supplementary characters.
+	* @param start offset in string
+	* @param textoffset offset in string
+	* @return offset to the previous safe character
+	*/
+	private final int getPreviousSafeOffset(int start, int textoffset)
+	{
+	    int result = textoffset; // first contraction character
+	    targetText.setIndex(textoffset);
+	    while (result >= start && m_collator_.isUnsafe(targetText.previous())) {
+	        result = targetText.getIndex();
+	    }
+	    if (result != start) {
+	        // the first contraction character is consider unsafe here
+	        result = targetText.getIndex(); // originally result --;
+	    }
+	    return result; 
+	}
+
+	/**
+	 * Take the rearranged end accents and tries matching. If match failed at
+	 * a seperate preceding set of accents (seperated from the rearranged on by
+	 * at least a base character) then we rearrange the preceding accents and 
+	 * tries matching again.
+	 * We allow skipping of the ends of the accent set if the ces do not match. 
+	 * However if the failure is found before the accent set, it fails.
+	 * Internal method, status assumed to be success, caller has to check 
+	 * status before calling this method.
+	 * @param textoffset of the start of the rearranged accent
+	 * @return DONE if a match is not found, otherwise return the starting
+	 *         offset of the match. Note this start includes all preceding 
+	 *         accents.
+	 */
+	private int doNextCanonicalSuffixMatch(int textoffset)
+	{
+	    int safelength = 0;
+	    StringBuffer safetext;
+		int safeoffset = m_textBeginOffset_; 
+		
+	    if (textoffset != m_textBeginOffset_ 
+	    	&& m_canonicalSuffixAccents_.length() > 0
+	    	&& m_collator_.isUnsafe(m_canonicalSuffixAccents_.charAt(0))) {
+	        safeoffset     = getPreviousSafeOffset(m_textBeginOffset_, 
+	        										textoffset);
+	        safelength     = textoffset - safeoffset;
+	        safetext       = merge(null, targetText, safeoffset, textoffset, 
+	                               m_canonicalSuffixAccents_);
+	    }
+	    else {
+	        safetext = m_canonicalSuffixAccents_;
+	    }
+	
+	    // if status is a failure, ucol_setText does nothing
+	    CollationElementIterator coleiter = m_utilColEIter_;
+	    coleiter.setText(safetext.toString());
+	    // status checked in loop below
+	
+	    int ceindex = m_pattern_.m_CELength_ - 1;
+	    boolean isSafe = true; // indication flag for position in safe zone
+	    
+	    while (ceindex >= 0) {
+	        int textce = coleiter.previous();
+	        if (textce == CollationElementIterator.NULLORDER) {
+	            // check if we have passed the safe buffer
+	            if (coleiter == m_colEIter_) {
+	                return DONE;
+	            }
+	            coleiter = m_colEIter_;
+	            if (safetext != m_canonicalSuffixAccents_) {
+	            	safetext.delete(0, safetext.length());
+	            }
+	            coleiter.setExactOffset(safeoffset);
+	            // status checked at the start of the loop
+	            isSafe = false;
+	            continue;
+	        }
+	        textce = getCE(textce);
+	        if (textce != CollationElementIterator.IGNORABLE 
+	        	&& textce != m_pattern_.m_CE_[ceindex]) {
+	            // do the beginning stuff
+	            int failedoffset = coleiter.getOffset();
+	            if (isSafe && failedoffset >= safelength) {
+	                // alas... no hope. failed at rearranged accent set
+	                return DONE;
+	            }
+	            else {
+	                if (isSafe) {
+	                    failedoffset += safeoffset;
+	                }
+	                
+	                // try rearranging the front accents
+	                int result = doNextCanonicalPrefixMatch(failedoffset, 
+	                										textoffset);
+	                if (result != DONE) {
+	                    // if status is a failure, ucol_setOffset does nothing
+	                    m_colEIter_.setExactOffset(result);
+	                }
+	                return result;
+	            }
+	        }
+	        if (textce == m_pattern_.m_CE_[ceindex]) {
+	            ceindex --;
+	        }
+	    }
+	    // set offset here
+	    if (isSafe) {
+	        int result = coleiter.getOffset();
+	        // sets the text iterator with the correct expansion and offset
+	        int leftoverces = coleiter.m_CEBufferOffset_;
+	        if (result >= safelength) { 
+	            result = textoffset;
+	        }
+	        else {
+	            result += safeoffset;
+	        }
+	        m_colEIter_.setExactOffset(result);
+	        m_colEIter_.m_CEBufferOffset_ = leftoverces;
+	        return result;
+	    }
+	    
+	    return coleiter.getOffset();              
+	}
+	
+	/**
+	 * Trying out the substring and sees if it can be a canonical match.
+	 * This will try normalizing the end accents and arranging them into 
+	 * canonical equivalents and check their corresponding ces with the pattern 
+	 * ce.
+	 * Suffix accents in the text will be grouped according to their combining 
+	 * class and the groups will be mixed and matched to try find the perfect 
+	 * match with the pattern.
+	 * So for instance looking for "\u0301" in "\u030A\u0301\u0325"
+	 * step 1: split "\u030A\u0301" into 6 other type of potential accent 
+	 *         substrings
+	 *         "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325", 
+	 *         "\u0301\u0325".
+	 * step 2: check if any of the generated substrings matches the pattern.
+	 * @param textoffset end offset in the collation element text that ends with 
+	 *                   the accents to be rearranged
+	 * @return true if the match is valid, false otherwise
+	 */
+	private boolean doNextCanonicalMatch(int textoffset)
+	{
+		int offset = m_colEIter_.getOffset();
+		targetText.setIndex(textoffset);
+		if (UTF16.isTrailSurrogate(targetText.previous()) 
+			&& targetText.getIndex() > m_textBeginOffset_) { 
+			if (!UTF16.isLeadSurrogate(targetText.previous())) {
+				targetText.next();
+			}
+		}
+	    if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_) == 0) {
+	        if (m_pattern_.m_hasPrefixAccents_) {
+	            offset = doNextCanonicalPrefixMatch(offset, textoffset);
+	            if (offset != DONE) {
+	                m_colEIter_.setExactOffset(offset);
+	                return true;
+	            }
+	        }
+	        return false;
+	    }
+	
+	    if (!m_pattern_.m_hasSuffixAccents_) {
+	        return false;
+	    }
+	
+	    StringBuffer accents = new StringBuffer();
+	    // offset to the last base character in substring to search
+	    int baseoffset = getPreviousBaseOffset(targetText, textoffset);
+	    // normalizing the offensive string
+	    String accentstr = getString(targetText, baseoffset, 
+	    							 textoffset - baseoffset);
+	    if (Normalizer.quickCheck(accentstr, Normalizer.NFD) 
+	        										== Normalizer.NO) {
+	        accentstr = Normalizer.decompose(accentstr, false);
+	    }
+	    accents.append(accentstr);
+	    // status checked in loop below
+	        
+	    int accentsindex[] = new int[INITIAL_ARRAY_SIZE_];
+	    int size = getUnblockedAccentIndex(accents, accentsindex);
+	
+	    // 2 power n - 1 minus the full set of accents
+	    int  count = (2 << (size - 1)) - 2;  
+	    while (count > 0) {
+            m_canonicalSuffixAccents_.delete(0, 
+                                           m_canonicalSuffixAccents_.length());
+	        // copy the base characters
+	        for (int k = 0; k < accentsindex[0]; k ++) {
+	            m_canonicalSuffixAccents_.append(accents.charAt(k));
+	        }
+	        // forming all possible canonical rearrangement by dropping
+	        // sets of accents
+	        for (int i = 0; i <= size - 1; i ++) {
+	            int mask = 1 << (size - i - 1);
+	            if ((count & mask) != 0) {
+	                for (int j = accentsindex[i]; j < accentsindex[i + 1]; 
+	                	j ++) {
+	                    m_canonicalSuffixAccents_.append(accents.charAt(j));
+	                }
+	            }
+	        }
+	        offset = doNextCanonicalSuffixMatch(baseoffset);
+	        if (offset != DONE) {
+	            return true; // match found
+	        }
+	        count --;
+	    }
+	    return false;
+	}
+	
+	/**
+	 * Gets the previous base character offset depending on the string search 
+	 * pattern data
+	 * @param strsrch string search data
+	 * @param textoffset current offset, current character
+	 * @return the offset of the next character after this base character or 
+	 * 			itself if it is a composed character with accents
+	 */
+	private final int getPreviousBaseOffset(int textoffset)
+	{
+	    if (m_pattern_.m_hasPrefixAccents_ && textoffset > m_textBeginOffset_) {
+	        int offset = textoffset;
+	        if ((getFCD(targetText, offset) >> SECOND_LAST_BYTE_SHIFT_) != 0) {
+	            return getPreviousBaseOffset(targetText, textoffset);
+	        }
+	    }
+	    return textoffset;
+	}
+	
+	/**
+	 * Checks match for contraction. 
+	 * If the match ends with a partial contraction we fail.
+	 * If the match starts too far off (because of backwards iteration) we try 
+	 * to chip off the extra characters.
+	 * Uses the temporary util buffer for return values of the modified start
+	 * and end.
+	 * @param start offset of potential match, to be modified if necessary
+	 * @param end offset of potential match, to be modified if necessary
+	 * @return true if match passes the contraction test, false otherwise. 
+	 */
+	private boolean checkNextCanonicalContractionMatch(int start, int end) 
+	{
+	    // This part checks if either ends of the match contains potential 
+	    // contraction. If so we'll have to iterate through them
+	    char schar = 0;
+	    char echar = 0;
+	    if (end < m_textLimitOffset_) {
+	    	targetText.setIndex(end);
+	    	echar = targetText.current();
+	    }
+	    if (start < m_textLimitOffset_) {
+	    	targetText.setIndex(start + 1);
+	    	schar = targetText.current();
+	    }
+	    if (m_collator_.isUnsafe(echar) || m_collator_.isUnsafe(schar)) {
+	        int expansion  = m_colEIter_.m_CEBufferOffset_;
+	        boolean hasExpansion = expansion > 0;
+	        m_colEIter_.setExactOffset(start);
+	        int temp = start;
+	        while (expansion > 0) {
+	            // getting rid of the redundant ce, caused by setOffset.
+	            // since backward contraction/expansion may have extra ces if 
+	            // we are in the normalization buffer, hasAccentsBeforeMatch 
+	            // would have taken care of it.
+	            // E.g. the character \u01FA will have an expansion of 3, but 
+	            // if we are only looking for acute and ring \u030A and \u0301, 
+	            // we'll have to skip the first ce in the expansion buffer.
+	            m_colEIter_.next();
+	            if (m_colEIter_.getOffset() != temp) {
+	                start = temp;
+	                temp  = m_colEIter_.getOffset();
+	            }
+	            expansion --;
+	        }
+	
+	        int count = 0;
+	        while (count < m_pattern_.m_CELength_) {
+	            int ce = getCE(m_colEIter_.next());
+	            // status checked below, note that if status is a failure
+	            // ucol_next returns UCOL_NULLORDER
+	            if (ce == CollationElementIterator.IGNORABLE) {
+	                continue;
+	            }
+	            if (hasExpansion && count == 0 
+	            	&& m_colEIter_.getOffset() != temp) {
+	                start = temp;
+	                temp = m_colEIter_.getOffset();
+	            }
+	
+	            if (count == 0 && ce != m_pattern_.m_CE_[0]) {
+	                // accents may have extra starting ces, this occurs when a 
+	                // pure accent pattern is matched without rearrangement
+	                // text \u0325\u0300 and looking for \u0300
+	                int expected = m_pattern_.m_CE_[0]; 
+	                if ((getFCD(targetText, start) & LAST_BYTE_MASK_) != 0) {
+	                    ce = getCE(m_colEIter_.next());
+	                    while (ce != expected 
+	                           && ce != CollationElementIterator.NULLORDER 
+	                           && m_colEIter_.getOffset() <= end) {
+	                        ce = getCE(m_colEIter_.next());
+	                    }
+	                }
+	            }
+	            if (ce != m_pattern_.m_CE_[count]) {
+	                end ++;
+	                end = getNextBaseOffset(end);  
+	                m_utilBuffer_[0] = start;
+	                m_utilBuffer_[1] = end;
+	                return false;
+	            }
+	            count ++;
+	        }
+	    } 
+	    m_utilBuffer_[0] = start;
+	    m_utilBuffer_[1] = end;
+	    return true;
+	}
+
+	/**
+	 * Checks and sets the match information if found.
+	 * Checks 
+	 * <ul>
+	 * <li> the potential match does not repeat the previous match
+	 * <li> boundaries are correct
+	 * <li> potential match does not end in the middle of a contraction
+	 * <li> identical matches
+	 * </ul>
+	 * Otherwise the offset will be shifted to the next character.
+	 * The result m_matchIndex_ and m_matchLength_ will be set to the truncated
+	 * more fitting result value.
+	 * Uses the temporary utility buffer for storing the modified textoffset.
+	 * @param textoffset offset in the collation element text.
+	 * @return true if the match is valid, false otherwise
+	 */
+	private boolean checkNextCanonicalMatch(int textoffset)
+	{
+	    // to ensure that the start and ends are not composite characters
+	    // if we have a canonical accent match
+	    if ((m_pattern_.m_hasSuffixAccents_ 
+	    		&& m_canonicalSuffixAccents_.length() != 0) || 
+	        (m_pattern_.m_hasPrefixAccents_ 
+	        	&& m_canonicalPrefixAccents_.length() != 0)) {
+	        m_matchedIndex_ = getPreviousBaseOffset(m_colEIter_.getOffset());
+	        matchLength = textoffset - m_matchedIndex_;
+	        return true;
+	    }
+	
+	    int start = m_colEIter_.getOffset();
+	    if (!checkNextCanonicalContractionMatch(start, textoffset)) {
+	    	// return the modified textoffset
+	    	m_utilBuffer_[0] = m_utilBuffer_[1]; 
+	        return false;
+	    }
+	    start = m_utilBuffer_[0];
+	    textoffset = m_utilBuffer_[1];
+	    start = getPreviousBaseOffset(start);
+	    // this totally matches, however we need to check if it is repeating
+	    if (checkRepeatedMatch(start, textoffset) 
+	    	|| !isBreakUnit(start, textoffset) 
+	    	|| !checkIdentical(start, textoffset)) {
+	        textoffset ++;
+	        textoffset = getNextBaseOffset(targetText, textoffset);
+	        m_utilBuffer_[0] = textoffset;
+	        return false;
+	    }
+	    
+	    m_matchedIndex_  = start;
+	    matchLength = textoffset - start;
+	    return true;
+	}
+	
+	/**
+	 * Shifting the collation element iterator position forward to prepare for
+	 * a preceding match. If the first character is a unsafe character, we'll 
+	 * only shift by 1 to capture contractions, normalization etc.
+	 * @param textoffset start text position to do search
+	 * @param ce the text ce which failed the match.
+	 * @param patternceindex index of the ce within the pattern ce buffer which
+	 *        failed the match
+	 * @return final offset
+	 */
+	private int reverseShift(int textoffset, int ce, int patternceindex)
+	{         
+	    if (isOverlapping()) {
+	        if (textoffset != m_textLimitOffset_) {
+	            textoffset --;
+	        }
+	        else {
+	            textoffset -= m_pattern_.m_defaultShiftSize_;
+	        }
+	    }
+	    else {
+	        if (ce != CollationElementIterator.NULLORDER) {
+	            int shift = m_pattern_.m_backShift_[hash(ce)];
+	            
+	            // this is to adjust for characters in the middle of the substring 
+	            // for matching that failed.
+	            int adjust = patternceindex;
+	            if (adjust > 1 && shift > adjust) {
+	                shift -= adjust - 1;
+	            }
+	            textoffset -= shift;
+	        }
+	        else {
+	            textoffset -= m_pattern_.m_defaultShiftSize_;
+	        }
+	    }    
+        
+	    textoffset = getPreviousBaseOffset(textoffset);
+	    return textoffset;
+	}
+
+	/**
+	 * Checks match for contraction. 
+	 * If the match starts with a partial contraction we fail.
+	 * Uses the temporary utility buffer to return the modified start and end.
+	 * @param start offset of potential match, to be modified if necessary
+	 * @param end offset of potential match, to be modified if necessary
+	 * @return true if match passes the contraction test, false otherwise.
+	 */
+	private boolean checkPreviousExactContractionMatch(int start, int end) 
+	{
+	    // This part checks if either ends of the match contains potential 
+	    // contraction. If so we'll have to iterate through them
+	    char echar = 0;
+	    if (end < m_textLimitOffset_) {
+	    	targetText.setIndex(end);
+	    	echar = targetText.current();
+	    }
+	    char schar = 0;
+	    if (start + 1 < m_textLimitOffset_) {
+	    	targetText.setIndex(start + 1);
+	    	schar = targetText.current();
+	    }
+	    if (m_collator_.isUnsafe(echar) || m_collator_.isUnsafe(schar)) {
+	    	// expansion suffix, what's left to iterate
+	        int expansion = m_colEIter_.m_CEBufferSize_ 
+	        								- m_colEIter_.m_CEBufferOffset_;
+	        boolean hasExpansion = expansion > 0;
+	        m_colEIter_.setExactOffset(end);
+	        int temp = end;
+	        while (expansion > 0) {
+	            // getting rid of the redundant ce
+	            // since forward contraction/expansion may have extra ces
+	            // if we are in the normalization buffer, hasAccentsBeforeMatch
+	            // would have taken care of it.
+	            // E.g. the character \u01FA will have an expansion of 3, but if
+	            // we are only looking for A ring A\u030A, we'll have to skip the 
+	            // last ce in the expansion buffer
+	            m_colEIter_.previous();
+	            if (m_colEIter_.getOffset() != temp) {
+	                end = temp;
+	                temp = m_colEIter_.getOffset();
+	            }
+	            expansion --;
+	        }
+	
+	        int count = m_pattern_.m_CELength_;
+	        while (count > 0) {
+	            int ce = getCE(m_colEIter_.previous());
+	            // status checked below, note that if status is a failure
+	            // ucol_previous returns UCOL_NULLORDER
+	            if (ce == CollationElementIterator.IGNORABLE) {
+	                continue;
+	            }
+	            if (hasExpansion && count == 0 
+	            	&& m_colEIter_.getOffset() != temp) {
+	                end = temp;
+	                temp = m_colEIter_.getOffset();
+	            }
+	            if (ce != m_pattern_.m_CE_[count - 1]) {
+	                start --;
+	                start = getPreviousBaseOffset(targetText, start);
+	                m_utilBuffer_[0] = start;
+	                m_utilBuffer_[1] = end;
+	                return false;
+	            }
+	            count --;
+	        }
+	    } 
+	    m_utilBuffer_[0] = start;
+	    m_utilBuffer_[1] = end;
+	    return true;
+	}
+	
+	/**
+	 * Checks and sets the match information if found.
+	 * Checks 
+	 * <ul>
+	 * <li> the current match does not repeat the last match
+	 * <li> boundaries are correct
+	 * <li> exact matches has no extra accents
+	 * <li> identical matches
+	 * </ul>
+	 * Otherwise the offset will be shifted to the preceding character.
+	 * Uses the temporary utility buffer to store the modified textoffset.
+	 * @param textoffset offset in the collation element text. the returned value
+	 *        will be the truncated start offset of the match or the new start 
+	 *        search offset.
+	 * @return true if the match is valid, false otherwise
+	 */
+	private final boolean checkPreviousExactMatch(int textoffset)
+	{
+	    // to ensure that the start and ends are not composite characters
+	    int end = m_colEIter_.getOffset();        
+	    if (!checkPreviousExactContractionMatch(textoffset, end)) {
+	        return false;
+	    }
+	    textoffset = m_utilBuffer_[0];
+	    end = m_utilBuffer_[1];
+	        
+	    // this totally matches, however we need to check if it is repeating
+	    // the old match
+	    if (checkRepeatedMatch(textoffset, end) 
+	    	|| !isBreakUnit(textoffset, end) 
+	    	|| hasAccentsBeforeMatch(textoffset, end) 
+	    	|| !checkIdentical(textoffset, end) 
+	    	|| hasAccentsAfterMatch(textoffset, end)) {
+	        textoffset --;
+	        textoffset = getPreviousBaseOffset(targetText, textoffset);
+	        m_utilBuffer_[0] = textoffset;
+	        return false;
+	    }
+	    m_matchedIndex_ = textoffset;
+	    matchLength = end - textoffset;
+	    return true;
+	}
+
+	/**
+	 * Rearranges the end accents to try matching.
+	 * Suffix accents in the text will be grouped according to their combining 
+	 * class and the groups will be mixed and matched to try find the perfect 
+	 * match with the pattern.
+	 * So for instance looking for "\u0301" in "\u030A\u0301\u0325"
+	 * step 1: split "\u030A\u0301" into 6 other type of potential accent 
+	 * 			substrings
+	 *         "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325", 
+	 *         "\u0301\u0325".
+	 * step 2: check if any of the generated substrings matches the pattern.
+	 * @param start offset of the first base character
+	 * @param end start of the last accent set
+	 * @return DONE if a match is not found, otherwise return the ending
+	 *         offset of the match. Note this start includes all following 
+	 *         accents.
+	 */
+	private int doPreviousCanonicalSuffixMatch(int start, int end)
+	{
+	    targetText.setIndex(end);
+		if (UTF16.isTrailSurrogate(targetText.previous()) 
+			&& targetText.getIndex() > m_textBeginOffset_) {
+			if (!UTF16.isLeadSurrogate(targetText.previous())) {
+				targetText.next();
+			} 
+		}
+	    if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_) == 0) {
+	        // die... failed at a base character
+	        return DONE;
+	    }
+	    end = getNextBaseOffset(targetText, end);
+	
+	    StringBuffer accents = new StringBuffer();
+	    int offset = getPreviousBaseOffset(targetText, end);
+	    // normalizing the offensive string
+	    String accentstr = getString(targetText, offset, end - offset);
+	    if (Normalizer.quickCheck(accentstr, Normalizer.NFD) 
+	        										== Normalizer.NO) {
+	        accentstr = Normalizer.decompose(accentstr, false);
+	    }
+	    accents.append(accentstr);    
+	        
+	    int accentsindex[] = new int[INITIAL_ARRAY_SIZE_];      
+	    int accentsize = getUnblockedAccentIndex(accents, accentsindex);
+	    int count = (2 << (accentsize - 1)) - 2;  
+	    while (count > 0) {
+            m_canonicalSuffixAccents_.delete(0, 
+                                           m_canonicalSuffixAccents_.length());
+	        // copy the base characters
+	        for (int k = 0; k < accentsindex[0]; k ++) {
+	             m_canonicalSuffixAccents_.append(accents.charAt(k));
+	        }
+	        // forming all possible canonical rearrangement by dropping
+	        // sets of accents
+	        for (int i = 0; i <= accentsize - 1; i ++) {
+	            int mask = 1 << (accentsize - i - 1);
+	            if ((count & mask) != 0) {
+	                for (int j = accentsindex[i]; j < accentsindex[i + 1]; 
+	                													j ++) {
+	                    m_canonicalSuffixAccents_.append(accents.charAt(j));
+	                }
+	            }
+	        }
+	        StringBuffer match = merge(m_canonicalPrefixAccents_, targetText,
+	        							start, offset, 
+	        							m_canonicalSuffixAccents_);
+	        // run the collator iterator through this match
+	        // if status is a failure ucol_setText does nothing
+	        m_utilColEIter_.setText(match.toString());
+	        if (checkCollationMatch(m_utilColEIter_)) {
+	            return end;
+	        }
+	        count --;
+	    }
+	    return DONE;
+	}
+	
+	/**
+	 * Take the rearranged start accents and tries matching. If match failed at
+	 * a seperate following set of accents (seperated from the rearranged on by
+	 * at least a base character) then we rearrange the preceding accents and 
+	 * tries matching again.
+	 * We allow skipping of the ends of the accent set if the ces do not match. 
+	 * However if the failure is found before the accent set, it fails.
+	 * Internal method, status assumed to be success, caller has to check 
+	 * status before calling this method.
+	 * @param textoffset of the ends of the rearranged accent
+	 * @return DONE if a match is not found, otherwise return the ending offset 
+	 * 			of the match. Note this start includes all following accents.
+	 */
+	private int doPreviousCanonicalPrefixMatch(int textoffset)
+	{
+	    int safelength = 0;
+	    StringBuffer safetext;
+	    int safeoffset = textoffset;
+	
+	    if (textoffset > m_textBeginOffset_
+	    	&& m_collator_.isUnsafe(m_canonicalPrefixAccents_.charAt(
+	    							m_canonicalPrefixAccents_.length() - 1))) {
+	        safeoffset = getNextSafeOffset(textoffset, m_textLimitOffset_);
+	        safelength = safeoffset - textoffset;
+	        safetext = merge(m_canonicalPrefixAccents_, targetText, textoffset, 
+	        				 safeoffset, null);
+	    }
+	    else {
+	        safetext = m_canonicalPrefixAccents_;
+	    }
+	
+	    // if status is a failure, ucol_setText does nothing
+	    CollationElementIterator coleiter = m_utilColEIter_;
+	    coleiter.setText(safetext.toString());
+	    // status checked in loop below
+	    
+	    int ceindex = 0;
+	    boolean isSafe = true; // safe zone indication flag for position
+	    int prefixlength = m_canonicalPrefixAccents_.length();
+	    
+	    while (ceindex < m_pattern_.m_CELength_) {
+	        int textce = coleiter.next();
+	        if (textce == CollationElementIterator.NULLORDER) {
+	            // check if we have passed the safe buffer
+	            if (coleiter == m_colEIter_) {
+	                return DONE;
+	            }
+	            if (safetext != m_canonicalPrefixAccents_) {
+	            	safetext.delete(0, safetext.length());
+	            }
+	            coleiter = m_colEIter_;
+	            coleiter.setExactOffset(safeoffset);
+	            // status checked at the start of the loop
+	            isSafe = false;
+	            continue;
+	        }
+	        textce = getCE(textce);
+	        if (textce != CollationElementIterator.IGNORABLE 
+	        	&& textce != m_pattern_.m_CE_[ceindex]) {
+	            // do the beginning stuff
+	            int failedoffset = coleiter.getOffset();
+	            if (isSafe && failedoffset <= prefixlength) {
+	                // alas... no hope. failed at rearranged accent set
+	                return DONE;
+	            }
+	            else {
+	                if (isSafe) {
+	                    failedoffset = safeoffset - failedoffset;
+	                    if (safetext != m_canonicalPrefixAccents_) {
+	            			safetext.delete(0, safetext.length());
+	            		}
+	                }
+	                
+	                // try rearranging the end accents
+	                int result = doPreviousCanonicalSuffixMatch(textoffset, 
+	                											failedoffset);
+	                if (result != DONE) {
+	                    // if status is a failure, ucol_setOffset does nothing
+	                    m_colEIter_.setExactOffset(result);
+	                }
+	                return result;
+	            }
+	        }
+	        if (textce == m_pattern_.m_CE_[ceindex]) {
+	            ceindex ++;
+	        }
+	    }
+	    // set offset here
+	    if (isSafe) {
+	        int result = coleiter.getOffset();
+	        // sets the text iterator here with the correct expansion and offset
+	        int leftoverces = coleiter.m_CEBufferSize_ 
+	        									- coleiter.m_CEBufferOffset_;
+	        if (result <= prefixlength) { 
+	            result = textoffset;
+	        }
+	        else {
+	            result = textoffset + (safeoffset - result);
+	        }
+	        m_colEIter_.setExactOffset(result);
+	        m_colEIter_.m_CEBufferOffset_ = m_colEIter_.m_CEBufferSize_ 
+	        													- leftoverces;
+	        return result;
+	    }
+	    
+	    return coleiter.getOffset();              
+	}
+	
+	/**
+	 * Trying out the substring and sees if it can be a canonical match.
+	 * This will try normalizing the starting accents and arranging them into 
+	 * canonical equivalents and check their corresponding ces with the pattern 
+	 * ce.
+	 * Prefix accents in the text will be grouped according to their combining 
+	 * class and the groups will be mixed and matched to try find the perfect 
+	 * match with the pattern.
+	 * So for instance looking for "\u0301" in "\u030A\u0301\u0325"
+	 * step 1: split "\u030A\u0301" into 6 other type of potential accent 
+	 * 		   substrings
+	 *         "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325", 
+	 *         "\u0301\u0325".
+	 * step 2: check if any of the generated substrings matches the pattern.
+	 * @param textoffset start offset in the collation element text that starts 
+	 *                   with the accents to be rearranged
+	 * @return true if the match is valid, false otherwise
+	 */
+	private boolean doPreviousCanonicalMatch(int textoffset)
+	{
+        int offset = m_colEIter_.getOffset();
+	    if ((getFCD(targetText, textoffset) >> SECOND_LAST_BYTE_SHIFT_) == 0) {
+	        if (m_pattern_.m_hasSuffixAccents_) {
+	            offset = doPreviousCanonicalSuffixMatch(textoffset, offset);
+	            if (offset != DONE) {
+	                m_colEIter_.setExactOffset(offset);
+	                return true;
+	            }
+	        }
+	        return false;
+	    }
+	
+	    if (!m_pattern_.m_hasPrefixAccents_) {
+	        return false;
+	    }
+	
+	    StringBuffer accents = new StringBuffer();
+	    // offset to the last base character in substring to search
+	    int baseoffset = getNextBaseOffset(targetText, textoffset);
+	    // normalizing the offensive string
+	    String textstr = getString(targetText, textoffset, 
+	    											baseoffset - textoffset);
+	    if (Normalizer.quickCheck(textstr, Normalizer.NFD) 
+	        										== Normalizer.NO) {
+	        textstr = Normalizer.decompose(textstr, false);
+	    }
+	    accents.append(textstr);
+	    // status checked in loop
+	        
+	    int accentsindex[] = new int[INITIAL_ARRAY_SIZE_];
+	    int size = getUnblockedAccentIndex(accents, accentsindex);
+	
+	    // 2 power n - 1 minus the full set of accents
+	    int count = (2 << (size - 1)) - 2;  
+	    while (count > 0) {
+	    	m_canonicalPrefixAccents_.delete(0, 
+	    								m_canonicalPrefixAccents_.length());
+	        // copy the base characters
+	        for (int k = 0; k < accentsindex[0]; k ++) {
+	            m_canonicalPrefixAccents_.append(accents.charAt(k));
+	        }
+	        // forming all possible canonical rearrangement by dropping
+	        // sets of accents
+	        for (int i = 0; i <= size - 1; i ++) {
+	            int mask = 1 << (size - i - 1);
+	            if ((count & mask) != 0) {
+	                for (int j = accentsindex[i]; j < accentsindex[i + 1]; 
+	                	 j ++) {
+	                    m_canonicalPrefixAccents_.append(accents.charAt(j));
+	                }
+	            }
+	        }
+	        offset = doPreviousCanonicalPrefixMatch(baseoffset);
+	        if (offset != DONE) {
+	            return true; // match found
+	        }
+	        count --;
+	    }
+	    return false;
+	}
+	
+	/**
+	 * Checks match for contraction. 
+	 * If the match starts with a partial contraction we fail.
+	 * Uses the temporary utility buffer to return the modified start and end.
+	 * @param start offset of potential match, to be modified if necessary
+	 * @param end offset of potential match, to be modified if necessary
+	 * @return true if match passes the contraction test, false otherwise.
+	 */
+	private boolean checkPreviousCanonicalContractionMatch(int start, int end) 
+	{
+	    int temp = end;
+	    // This part checks if either ends of the match contains potential 
+	    // contraction. If so we'll have to iterate through them
+	    char echar = 0;
+	    char schar = 0;
+	    if (end < m_textLimitOffset_) {
+	    	targetText.setIndex(end);
+	    	echar = targetText.current();
+	    }
+	    if (start + 1 < m_textLimitOffset_) {
+	    	targetText.setIndex(start + 1);
+	    	schar = targetText.current();
+	    }
+	    if (m_collator_.isUnsafe(echar) || m_collator_.isUnsafe(schar)) {
+	        int expansion = m_colEIter_.m_CEBufferSize_ 
+	        								- m_colEIter_.m_CEBufferOffset_;
+	        boolean hasExpansion = expansion > 0;
+	        m_colEIter_.setExactOffset(end);
+	        while (expansion > 0) {
+	            // getting rid of the redundant ce
+	            // since forward contraction/expansion may have extra ces
+	            // if we are in the normalization buffer, hasAccentsBeforeMatch
+	            // would have taken care of it.
+	            // E.g. the character \u01FA will have an expansion of 3, but 
+	            // if we are only looking for A ring A\u030A, we'll have to 
+	            // skip the last ce in the expansion buffer
+	            m_colEIter_.previous();
+	            if (m_colEIter_.getOffset() != temp) {
+	                end = temp;
+	                temp = m_colEIter_.getOffset();
+	            }
+	            expansion --;
+	        }
+	
+	        int count = m_pattern_.m_CELength_;
+	        while (count > 0) {
+	            int ce = getCE(m_colEIter_.previous());
+	            // status checked below, note that if status is a failure
+	            // previous() returns NULLORDER
+	            if (ce == CollationElementIterator.IGNORABLE) {
+	                continue;
+	            }
+	            if (hasExpansion && count == 0 
+	            	&& m_colEIter_.getOffset() != temp) {
+	                end = temp;
+	                temp = m_colEIter_.getOffset();
+	            }
+	            if (count == m_pattern_.m_CELength_ 
+	            	&& ce != m_pattern_.m_CE_[m_pattern_.m_CELength_ - 1]) {
+	                // accents may have extra starting ces, this occurs when a 
+	                // pure accent pattern is matched without rearrangement
+	                int expected = m_pattern_.m_CE_[m_pattern_.m_CELength_ - 1];
+	                targetText.setIndex(end);
+	                if (UTF16.isTrailSurrogate(targetText.previous())) {
+	                	if (targetText.getIndex() > m_textBeginOffset_ &&
+	                		!UTF16.isLeadSurrogate(targetText.previous())) {
+	                		targetText.next();
+	                	}
+	                }
+	                end = targetText.getIndex();
+	                if ((getFCD(targetText, end) & LAST_BYTE_MASK_) != 0) {
+	                    ce = getCE(m_colEIter_.previous());
+	                    while (ce != expected 
+	                    		&& ce != CollationElementIterator.NULLORDER 
+	                    		&& m_colEIter_.getOffset() <= start) {
+	                        ce = getCE(m_colEIter_.previous());
+	                    }
+	                }
+	            }
+	            if (ce != m_pattern_.m_CE_[count - 1]) {
+	                start --;
+	                start = getPreviousBaseOffset(start);
+	                m_utilBuffer_[0] = start;
+	                m_utilBuffer_[1] = end;
+	                return false;
+	            }
+	            count --;
+	        }
+	    } 
+	    m_utilBuffer_[0] = start;
+	    m_utilBuffer_[1] = end;
+	    return true;
+	}
+	
+	/**
+	 * Checks and sets the match information if found.
+	 * Checks 
+	 * <ul>
+	 * <li> the potential match does not repeat the previous match
+	 * <li> boundaries are correct
+	 * <li> potential match does not end in the middle of a contraction
+	 * <li> identical matches
+	 * </ul>
+	 * Otherwise the offset will be shifted to the next character.
+	 * Uses the temporary utility buffer for storing the modified textoffset.
+	 * @param textoffset offset in the collation element text. the returned 
+	 * 			value will be the truncated start offset of the match or the 
+	 * 			new start search offset.
+	 * @return true if the match is valid, false otherwise
+	 */
+	private boolean checkPreviousCanonicalMatch(int textoffset)
+	{
+	    // to ensure that the start and ends are not composite characters
+	    // if we have a canonical accent match
+	    if (m_pattern_.m_hasSuffixAccents_ 
+	    	&& m_canonicalSuffixAccents_.length() != 0 
+	    	|| m_pattern_.m_hasPrefixAccents_ 
+	    	&& m_canonicalPrefixAccents_.length() != 0) {
+	        m_matchedIndex_ = textoffset;
+	        matchLength = getNextBaseOffset(m_colEIter_.getOffset()) 
+	            												- textoffset;
+	        return true;
+	    }
+	
+	    int end = m_colEIter_.getOffset();
+	    if (!checkPreviousCanonicalContractionMatch(textoffset, end)) {
+	    	// storing the modified textoffset
+	    	return false;
+	    }
+	    textoffset = m_utilBuffer_[0];
+		end = m_utilBuffer_[1];
+	    end = getNextBaseOffset(end);
+	    // this totally matches, however we need to check if it is repeating
+	    if (checkRepeatedMatch(textoffset, end) 
+	    	|| !isBreakUnit(textoffset, end) 
+	    	|| !checkIdentical(textoffset, end)) {
+	        textoffset --;
+	        textoffset = getPreviousBaseOffset(textoffset);
+	        m_utilBuffer_[0] = textoffset;
+	        return false;
+	    }
+	    
+	    m_matchedIndex_ = textoffset;
+	    matchLength = end - textoffset;
+	    return true;
+	}
+	
+	/**
+	 * Method that does the next exact match
+	 * @param start the offset to start shifting from and performing the 
+	 *        next exact match
+	 */
+	private void handleNextExact(int start)
+	{
+	   	int textoffset = shiftForward(start, 
+	   								  CollationElementIterator.NULLORDER,
+	   								  m_pattern_.m_CELength_);
+		int targetce = CollationElementIterator.IGNORABLE;
+	    while (textoffset <= m_textLimitOffset_) {
+	    	m_colEIter_.setExactOffset(textoffset);
+	        int patternceindex = m_pattern_.m_CELength_ - 1;
+	        boolean found = false;
+	        int lastce = CollationElementIterator.NULLORDER;
+	        
+	        while (true) {
+	            // finding the last pattern ce match, imagine composite 
+	            // characters. for example: search for pattern A in text \u00C0
+	            // we'll have to skip \u0300 the grave first before we get to A
+	            targetce = m_colEIter_.previous();
+	            if (targetce == CollationElementIterator.NULLORDER) {
+	                found = false;
+	                break;
+	            }
+	            targetce = getCE(targetce);
+	            if (targetce == CollationElementIterator.IGNORABLE && 
+	            	m_colEIter_.isInBuffer()) { 
+	                // this is for the text \u0315\u0300 that requires 
+	                // normalization and pattern \u0300, where \u0315 is ignorable
+	                continue;
+	            }
+	            if (lastce == CollationElementIterator.NULLORDER 
+	            	|| lastce == CollationElementIterator.IGNORABLE) {
+	                lastce = targetce;
+	            }
+	            if (targetce == m_pattern_.m_CE_[patternceindex]) {
+	                // the first ce can be a contraction
+	                found = true;
+	                break;
+	            }
+	            if (m_colEIter_.m_CEBufferOffset_ <= 0) {
+	                found = false;
+	                break;
+	            }
+	        }
+	
+	        targetce = lastce;
+	        
+	        while (found && patternceindex > 0) {
+	            targetce = m_colEIter_.previous();
+	            if (targetce == CollationElementIterator.NULLORDER) {
+	                found = false;
+	                break;
+	            }
+	            targetce = getCE(targetce);
+	            if (targetce == CollationElementIterator.IGNORABLE) {
+	                continue;
+	            }
+	
+	            patternceindex --;
+	            found = found && targetce == m_pattern_.m_CE_[patternceindex]; 
+	        }
+	
+	        if (!found) {
+	            textoffset = shiftForward(textoffset, targetce, 
+	            											patternceindex);
+	            // status checked at loop.
+	            patternceindex = m_pattern_.m_CELength_;
+	            continue;
+	        }
+	        
+	        if (checkNextExactMatch(textoffset)) {
+	            // status checked in ucol_setOffset
+	            return;
+	        }
+	        textoffset = m_utilBuffer_[0];
+	    }
+	    setMatchNotFound();
+	}
+
+	/**
+	 * Method that does the next canonical match
+	 * @param start the offset to start shifting from and performing the 
+	 *        next canonical match
+	 */
+	private void handleNextCanonical(int start)
+	{
+	    boolean hasPatternAccents = 
+	       m_pattern_.m_hasSuffixAccents_ || m_pattern_.m_hasPrefixAccents_;
+	          
+	    // shifting it check for setting offset
+	    // if setOffset is called previously or there was no previous match, we
+	    // leave the offset as it is.
+	    int textoffset = shiftForward(start, CollationElementIterator.NULLORDER, 
+	                              	  m_pattern_.m_CELength_);
+	    m_canonicalPrefixAccents_.delete(0, m_canonicalPrefixAccents_.length());
+	    m_canonicalSuffixAccents_.delete(0, m_canonicalSuffixAccents_.length());
+		int targetce = CollationElementIterator.IGNORABLE;
+	    
+	    while (textoffset <= m_textLimitOffset_)
+	    {
+	    	m_colEIter_.setExactOffset(textoffset);
+	        int patternceindex = m_pattern_.m_CELength_ - 1;
+	        boolean found = false;
+	        int lastce = CollationElementIterator.NULLORDER;
+	        
+	        while (true) {
+	            // finding the last pattern ce match, imagine composite characters
+	            // for example: search for pattern A in text \u00C0
+	            // we'll have to skip \u0300 the grave first before we get to A
+	            targetce = m_colEIter_.previous();
+	            if (targetce == CollationElementIterator.NULLORDER) {
+	                found = false;
+	                break;
+	            }
+	            targetce = getCE(targetce);
+	            if (lastce == CollationElementIterator.NULLORDER 
+	            			|| lastce == CollationElementIterator.IGNORABLE) {
+	                lastce = targetce;
+	            }
+	            if (targetce == m_pattern_.m_CE_[patternceindex]) {
+	                // the first ce can be a contraction
+	                found = true;
+	                break;
+	            }
+	            if (m_colEIter_.m_CEBufferOffset_ <= 0) {
+	                found = false;
+	                break;
+	            }
+	        }
+	        targetce = lastce;
+	        
+	        while (found && patternceindex > 0) {
+	            targetce    = m_colEIter_.previous();
+	            if (targetce == CollationElementIterator.NULLORDER) {
+	                found = false;
+	                break;
+	            }
+	            targetce    = getCE(targetce);
+	            if (targetce == CollationElementIterator.IGNORABLE) {
+	                continue;
+	            }
+	
+	            patternceindex --;
+	            found = found && targetce == m_pattern_.m_CE_[patternceindex]; 
+	        }
+	
+	        // initializing the rearranged accent array
+	        if (hasPatternAccents && !found) {
+	            found = doNextCanonicalMatch(textoffset);
+	        }
+	
+	        if (!found) {
+	            textoffset = shiftForward(textoffset, targetce, patternceindex);
+	            // status checked at loop
+	            patternceindex = m_pattern_.m_CELength_;
+	            continue;
+	        }
+	        
+	        if (checkNextCanonicalMatch(textoffset)) {
+	            return;
+	        }
+	        textoffset = m_utilBuffer_[0];
+	    }
+	    setMatchNotFound();
+	}
+	
+	/**
+	 * Method that does the previous exact match
+	 * @param start the offset to start shifting from and performing the 
+	 *        previous exact match
+	 */
+	private void handlePreviousExact(int start)
+	{
+	    int textoffset = reverseShift(start, CollationElementIterator.NULLORDER, 
+	                                  m_pattern_.m_CELength_);
+	    while (textoffset >= m_textBeginOffset_)
+	    {
+	    	m_colEIter_.setExactOffset(textoffset);
+	        int patternceindex = 1;
+	        int targetce = CollationElementIterator.IGNORABLE;
+	        boolean found = false;
+	        int firstce = CollationElementIterator.NULLORDER;
+	        
+	        while (true) {
+	            // finding the first pattern ce match, imagine composite 
+	            // characters. for example: search for pattern \u0300 in text 
+	            // \u00C0, we'll have to skip A first before we get to 
+	            // \u0300 the grave accent
+	            targetce = m_colEIter_.next();
+	            if (targetce == CollationElementIterator.NULLORDER) {
+	                found = false;
+	                break;
+	            }
+	            targetce = getCE(targetce);
+	            if (firstce == CollationElementIterator.NULLORDER 
+	            	|| firstce == CollationElementIterator.IGNORABLE) {
+	                firstce = targetce;
+	            }
+	            if (targetce == CollationElementIterator.IGNORABLE) {
+	                continue;
+	            }         
+	            if (targetce == m_pattern_.m_CE_[0]) {
+	                found = true;
+	                break;
+	            }
+	            if (m_colEIter_.m_CEBufferOffset_ == -1 
+	            	|| m_colEIter_.m_CEBufferOffset_ 
+	            							== m_colEIter_.m_CEBufferSize_) {
+	                // checking for accents in composite character
+	                found = false;
+	                break;
+	            }
+	        }
+	
+	        targetce = firstce;
+	        
+	        while (found && patternceindex < m_pattern_.m_CELength_) {
+	            targetce = m_colEIter_.next();
+	            if (targetce == CollationElementIterator.NULLORDER) {
+	                found = false;
+	                break;
+	            }
+	            targetce = getCE(targetce);
+	            if (targetce == CollationElementIterator.IGNORABLE) {
+	                continue;
+	            }
+	
+	            found = found && targetce == m_pattern_.m_CE_[patternceindex]; 
+	            patternceindex ++;
+	        }
+	
+	        if (!found) {
+	            textoffset = reverseShift(textoffset, targetce, patternceindex);
+	            patternceindex = 0;
+	            continue;
+	        }
+	        
+	        if (checkPreviousExactMatch(textoffset)) {
+	            return;
+	        }
+	        textoffset = m_utilBuffer_[0];
+	    }
+	    setMatchNotFound();
+	}
+	
+	/**
+	 * Method that does the previous canonical match
+	 * @param start the offset to start shifting from and performing the 
+	 *        previous canonical match
+	 */
+	private void handlePreviousCanonical(int start)
+	{
+	    boolean hasPatternAccents = 
+	       m_pattern_.m_hasSuffixAccents_ || m_pattern_.m_hasPrefixAccents_;
+	          
+	    // shifting it check for setting offset
+	    // if setOffset is called previously or there was no previous match, we
+	    // leave the offset as it is.
+	    int textoffset = reverseShift(start, CollationElementIterator.NULLORDER, 
+	                              		m_pattern_.m_CELength_);
+	    m_canonicalPrefixAccents_.delete(0, m_canonicalPrefixAccents_.length());
+	    m_canonicalSuffixAccents_.delete(0, m_canonicalSuffixAccents_.length());
+	    
+	    while (textoffset >= m_textBeginOffset_)
+	    {
+	    	m_colEIter_.setExactOffset(textoffset);
+	        int patternceindex = 1;
+	        int targetce = CollationElementIterator.IGNORABLE;
+	        boolean found = false;
+	        int firstce = CollationElementIterator.NULLORDER;
+	        
+	        while (true) {
+	            // finding the first pattern ce match, imagine composite 
+	            // characters. for example: search for pattern \u0300 in text 
+	            // \u00C0, we'll have to skip A first before we get to 
+	            // \u0300 the grave accent
+	            targetce = m_colEIter_.next();
+	            if (targetce == CollationElementIterator.NULLORDER) {
+	                found = false;
+	                break;
+	            }
+	            targetce = getCE(targetce);
+	            if (firstce == CollationElementIterator.NULLORDER 
+	            	|| firstce == CollationElementIterator.IGNORABLE) {
+	                firstce = targetce;
+	            }
+	            
+	            if (targetce == m_pattern_.m_CE_[0]) {
+	                // the first ce can be a contraction
+	                found = true;
+	                break;
+	            }
+	            if (m_colEIter_.m_CEBufferOffset_ == -1 
+	            	|| m_colEIter_.m_CEBufferOffset_ 
+	            							== m_colEIter_.m_CEBufferSize_) {
+	                // checking for accents in composite character
+	                found = false;
+	                break;
+	            }
+	        }
+	
+	        targetce = firstce;
+	        
+	        while (found && patternceindex < m_pattern_.m_CELength_) {
+	            targetce = m_colEIter_.next();
+	            if (targetce == CollationElementIterator.NULLORDER) {
+	                found = false;
+	                break;
+	            }
+	            targetce = getCE(targetce);
+	            if (targetce == CollationElementIterator.IGNORABLE) {
+	                continue;
+	            }
+	
+	            found = found && targetce == m_pattern_.m_CE_[patternceindex]; 
+	            patternceindex ++;
+	        }
+	
+	        // initializing the rearranged accent array
+	        if (hasPatternAccents && !found) {
+	            found = doPreviousCanonicalMatch(textoffset);
+	        }
+	
+	        if (!found) {
+	            textoffset = reverseShift(textoffset, targetce, patternceindex);
+	            patternceindex = 0;
+	            continue;
+	        }
+	
+	        if (checkPreviousCanonicalMatch(textoffset)) {
+	            return;
+	        }
+	        textoffset = m_utilBuffer_[0];
+	    }
+	    setMatchNotFound();
+	}
+	
+	/**
+	 * Gets a substring out of a CharacterIterator
+	 * @param text CharacterIterator
+	 * @param start start offset
+	 * @param length of substring
+	 * @return substring from text starting at start and length length
+	 */
+	private static final String getString(CharacterIterator text, int start,
+											int length)
+	{
+		StringBuffer result = new StringBuffer(length);
+		int offset = text.getIndex();
+		text.setIndex(start);
+		for (int i = 0; i < length; i ++) {
+			result.append(text.current());
+			text.next();
+		}
+		text.setIndex(offset);
+		return result.toString();
+	}
+	
+	/**
+	 * Getting the mask for collation strength
+	 * @param strength collation strength
+ 	 * @return collation element mask
+	 */
+	private static final int getMask(int strength) 
+	{
+	    switch (strength) 
+	    {
+	    	case Collator.PRIMARY:
+	        	return RuleBasedCollator.CE_PRIMARY_MASK_;
+	    	case Collator.SECONDARY:
+	        	return RuleBasedCollator.CE_SECONDARY_MASK_ 
+	        	       | RuleBasedCollator.CE_PRIMARY_MASK_;
+	    	default:
+	        	return RuleBasedCollator.CE_TERTIARY_MASK_ 
+	        	       | RuleBasedCollator.CE_SECONDARY_MASK_ 
+	                   | RuleBasedCollator.CE_PRIMARY_MASK_;
+	    }
+	}
+    
+    /**
+     * Sets match not found 
+     */
+    private void setMatchNotFound() 
+    {
+        // this method resets the match result regardless of the error status.
+        m_matchedIndex_ = DONE;
+        setMatchLength(0);
     }
-};
+}