From 9172f8ef1276d1476ec2c2d2eca8ffef1cfd316f Mon Sep 17 00:00:00 2001 From: Syn Wee Quek Date: Thu, 16 May 2002 20:04:49 +0000 Subject: [PATCH] ICU-1897 First round of bug report from the Chinese team fixed. * Unbalanced string length comparison * decomposition mode * hash code calculation X-SVN-Rev: 8640 --- .../icu/text/CollationElementIterator.java | 66 ++++++-- icu4j/src/com/ibm/icu/text/CollationKey.java | 10 +- icu4j/src/com/ibm/icu/text/Collator.java | 20 +-- .../src/com/ibm/icu/text/CollatorReader.java | 6 +- .../com/ibm/icu/text/RuleBasedCollator.java | 151 +++++++++++------- 5 files changed, 161 insertions(+), 92 deletions(-) diff --git a/icu4j/src/com/ibm/icu/text/CollationElementIterator.java b/icu4j/src/com/ibm/icu/text/CollationElementIterator.java index 1ac958fb1ef..9eb2ffa5ccc 100755 --- a/icu4j/src/com/ibm/icu/text/CollationElementIterator.java +++ b/icu4j/src/com/ibm/icu/text/CollationElementIterator.java @@ -100,6 +100,12 @@ public final class CollationElementIterator */ public int getOffset() { + if (m_bufferOffset_ != -1) { + if (m_isForwards_) { + return m_FCDLimit_; + } + return m_FCDStart_; + } return m_source_.getIndex(); } @@ -237,7 +243,8 @@ public final class CollationElementIterator * while iterating (i.e., call next() and then call previous(), or call * previous() and then call next()), you'll get back the same element * twice.

- * @return the previous collation element + * @return the previous collation element, or NULLORDER when the start of + * the iteration has been reached. * @draft 2.2 */ public synchronized int previous() @@ -415,13 +422,28 @@ public final class CollationElementIterator m_source_.setIndex(0); updateInternalState(); } - + + // public miscellaneous methods ----------------------------------------- + // protected data members ----------------------------------------------- /** * true if current codepoint was Hiragana */ protected boolean m_isCodePointHiragana_; + /** + * Position in the original string that starts with a non-FCD sequence + */ + protected int m_FCDStart_; + /** + * This is the CE from CEs buffer that should be returned. + * Initial value is 0. + * Forwards iteration will end with m_CEBufferOffset_ == m_CEBufferSize_, + * backwards will end with m_CEBufferOffset_ == 0. + * The next/previous after we reach the end/beginning of the m_CEBuffer_ + * will cause this value to be reset to 0. + */ + protected int m_CEBufferOffset_; // protected constructors ----------------------------------------------- @@ -464,6 +486,31 @@ public final class CollationElementIterator updateInternalState(); } + // protected methods ---------------------------------------------------- + + /** + * Checks if iterator is in the buffer zone + * @return true if iterator is in buffer zone, false otherwise + */ + protected boolean isInBuffer() + { + return m_bufferOffset_ != -1; + } + + /** + * Checks if the are anymore buffered CEs to be returned. + * @return true if there are more buffered CEs to be returned. + */ + protected boolean hasBufferedCE() + { + if (m_isForwards_) { + // m_CEBufferOffset_ is never negative + // if there is no expansion, m_CEBufferSize_ = 0 + return m_CEBufferOffset_ < m_CEBufferSize_; + } + return m_CEBufferOffset_ > 0; + } + // private data members ------------------------------------------------- // private inner class -------------------------------------------------- @@ -523,13 +570,12 @@ public final class CollationElementIterator /** * This is position to the m_buffer_, -1 if iterator is not in m_buffer_ */ - private int m_bufferOffset_; - /** - * This is the CE from CEs buffer that should be returned - */ - private int m_CEBufferOffset_; + private int m_bufferOffset_; /** - * This is the position to which we have stored processed CEs + * This is the position to which we have stored processed CEs. + * Initial value is 0. + * The next/previous after we reach the end/beginning of the m_CEBuffer_ + * will cause this value to be reset to 0. */ private int m_CEBufferSize_; /** @@ -541,10 +587,6 @@ public final class CollationElementIterator * Position in the original string to continue forward FCD check from. */ private int m_FCDLimit_; - /** - * Position in the original string that starts with a non-FCD sequence - */ - private int m_FCDStart_; /** * The collator this iterator is based on */ diff --git a/icu4j/src/com/ibm/icu/text/CollationKey.java b/icu4j/src/com/ibm/icu/text/CollationKey.java index 1385431f3d2..f3b2480b58d 100755 --- a/icu4j/src/com/ibm/icu/text/CollationKey.java +++ b/icu4j/src/com/ibm/icu/text/CollationKey.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollationKey.java,v $ -* $Date: 2002/05/14 16:48:49 $ -* $Revision: 1.4 $ +* $Date: 2002/05/16 20:04:49 $ +* $Revision: 1.5 $ * ******************************************************************************* */ @@ -216,13 +216,13 @@ public final class CollationKey implements Comparable StringBuffer key = new StringBuffer(size); int i = 0; while (m_key_[i] != 0 && m_key_[i + 1] != 0) { - key.append((m_key_[i] << 8) | m_key_[i + 1]); + key.append((char)((m_key_[i] << 8) | m_key_[i + 1])); i += 2; } if (m_key_[i] != 0) { - key.append(m_key_[i] << 8); + key.append((char)(m_key_[i] << 8)); } - m_hashCode_ = key.hashCode(); + m_hashCode_ = key.toString().hashCode(); } return m_hashCode_; } diff --git a/icu4j/src/com/ibm/icu/text/Collator.java b/icu4j/src/com/ibm/icu/text/Collator.java index 993f0127c29..53c78e47379 100755 --- a/icu4j/src/com/ibm/icu/text/Collator.java +++ b/icu4j/src/com/ibm/icu/text/Collator.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Collator.java,v $ -* $Date: 2002/05/14 16:48:49 $ -* $Revision: 1.4 $ +* $Date: 2002/05/16 20:04:49 $ +* $Revision: 1.5 $ * ******************************************************************************* */ @@ -201,12 +201,13 @@ public abstract class Collator * @see #PRIMARY * @see #SECONDARY * @see #TERTIARY + * @see #QUATERNARY * @see #IDENTICAL - * @exception IllegalArgumentException If the new strength value is not one of - * PRIMARY, SECONDARY, TERTIARY or IDENTICAL. + * @exception IllegalArgumentException If the new strength value is not one + * of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL. * @draft 2.2 */ - public synchronized void setStrength(int newStrength) { + public void setStrength(int newStrength) { if ((newStrength != PRIMARY) && (newStrength != SECONDARY) && (newStrength != TERTIARY) && @@ -229,18 +230,13 @@ public abstract class Collator * mode. * @draft 2.2 */ - public synchronized void setDecomposition(int decomposition) { + public void setDecomposition(int decomposition) { if ((decomposition != NO_DECOMPOSITION) && (decomposition != CANONICAL_DECOMPOSITION) && (decomposition != FULL_DECOMPOSITION)) { throw new IllegalArgumentException("Wrong decomposition mode."); } - if (decomposition != NO_DECOMPOSITION) { - m_decomposition_ = decomposition; - } - else { - m_decomposition_ = CANONICAL_DECOMPOSITION; - } + m_decomposition_ = CANONICAL_DECOMPOSITION; } // public getters -------------------------------------------------------- diff --git a/icu4j/src/com/ibm/icu/text/CollatorReader.java b/icu4j/src/com/ibm/icu/text/CollatorReader.java index 110ba7bffcd..7b6f4b0f2a9 100644 --- a/icu4j/src/com/ibm/icu/text/CollatorReader.java +++ b/icu4j/src/com/ibm/icu/text/CollatorReader.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollatorReader.java,v $ -* $Date: 2002/05/14 16:48:49 $ -* $Revision: 1.1 $ +* $Date: 2002/05/16 20:04:49 $ +* $Revision: 1.2 $ * ******************************************************************************* */ @@ -38,7 +38,7 @@ final class CollatorReader /** *

Protected constructor.

- * @param inputStream ICU uprop.dat file input stream + * @param inputStream ICU callator file input stream * @exception IOException throw if data file fails authentication * @draft 2.1 */ diff --git a/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java b/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java index d2737979a49..8b799a915e4 100755 --- a/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java +++ b/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java,v $ -* $Date: 2002/05/14 16:48:49 $ -* $Revision: 1.4 $ +* $Date: 2002/05/16 20:04:49 $ +* $Revision: 1.5 $ * ******************************************************************************* */ @@ -17,16 +17,15 @@ import java.io.DataInputStream; import java.io.BufferedInputStream; import java.io.IOException; import java.io.ByteArrayInputStream; -import java.nio.IntBuffer; import java.util.Locale; import java.util.ResourceBundle; import java.util.MissingResourceException; import java.text.CharacterIterator; +import java.text.StringCharacterIterator; import com.ibm.icu.impl.IntTrie; import com.ibm.icu.impl.Trie; import com.ibm.icu.impl.NormalizerImpl; import com.ibm.icu.impl.ICULocaleData; -import com.ibm.icu.impl.UCharacterIterator; /** *

The RuleBasedCollator class is a concrete subclass of Collator that @@ -282,7 +281,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate setDecomposition(Collator.CANONICAL_DECOMPOSITION); m_rules_ = rules; // tables = new RBCollationTables(rules, decomp); - // init(); + init(); } // public methods -------------------------------------------------------- @@ -314,7 +313,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate * otherwise * @draft 2.2 */ - public synchronized void setHiraganaQuartenary(boolean flag) + public void setHiraganaQuartenary(boolean flag) { m_isHiragana4_ = flag; } @@ -324,7 +323,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate * Collator's locale specific default value. * @draft 2.2 */ - public synchronized void setHiraganaQuartenaryDefault() + public void setHiraganaQuartenaryDefault() { m_isHiragana4_ = m_defaultIsHiragana4_; } @@ -336,7 +335,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate * uppercased characters * @draft 2.2 */ - public synchronized void setCaseFirst(boolean upper) + public void setCaseFirst(boolean upper) { if (upper) { m_caseFirst_ = AttributeValue.UPPER_FIRST_; @@ -352,7 +351,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate * Ignores case preferences. * @draft 2.2 */ - public synchronized void setCaseFirstOff() + public void setCaseFirstOff() { m_caseFirst_ = AttributeValue.OFF_; updateInternalState(); @@ -365,7 +364,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate * @see #setCaseFirstOff * @draft 2.2 */ - public synchronized final void setCaseFirstDefault() + public final void setCaseFirstDefault() { m_caseFirst_ = m_defaultCaseFirst_; updateInternalState(); @@ -377,9 +376,10 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate * @see #setAlternateHandling * @draft 2.2 */ - public synchronized void setAlternateHandlingDefault() + public void setAlternateHandlingDefault() { m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_; + updateInternalState(); } /** @@ -387,7 +387,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate * @see #setCaseLevel * @draft 2.2 */ - public synchronized void setCaseLevelDefault() + public void setCaseLevelDefault() { m_isCaseLevel_ = m_defaultIsCaseLevel_; updateInternalState(); @@ -399,7 +399,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate * @see #getDecomposition * @draft 2.2 */ - public synchronized void setDecompositionDefault() + public void setDecompositionDefault() { m_decomposition_ = m_defaultDecomposition_; } @@ -409,7 +409,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate * @see #getFrenchCollation * @draft 2.2 */ - public synchronized void setFrenchCollationDefault() + public void setFrenchCollationDefault() { m_isFrenchCollation_ = m_defaultIsFrenchCollation_; updateInternalState(); @@ -420,7 +420,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate * @see #setStrength * @draft 2.2 */ - public synchronized void setStrengthDefault() + public void setStrengthDefault() { m_strength_ = m_defaultStrength_; updateInternalState(); @@ -431,7 +431,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate * @param flag true to set the French collation on, false to set it off * @draft 2.2 */ - public synchronized void setFrenchCollation(boolean flag) + public void setFrenchCollation(boolean flag) { m_isFrenchCollation_ = flag; updateInternalState(); @@ -445,7 +445,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate * for the non-ignorable. * @draft 2.2 */ - public synchronized void setAlternateHandling(boolean shifted) + public void setAlternateHandling(boolean shifted) { m_isAlternateHandlingShifted_ = shifted; updateInternalState(); @@ -456,12 +456,39 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate * @param flag true if case level sorting is required, false otherwise * @draft 2.2 */ - public synchronized void setCaseLevel(boolean flag) + public void setCaseLevel(boolean flag) { m_isCaseLevel_ = flag; updateInternalState(); } + /** + *

Sets this Collator's strength property. The strength property + * determines the minimum level of difference considered significant + * during comparison.

+ *

See the Collator class description for an example of use.

+ * @param the new strength value. + * @see #getStrength + * @see #PRIMARY + * @see #SECONDARY + * @see #TERTIARY + * @see #QUATERNARY + * @see #IDENTICAL + * @exception IllegalArgumentException If the new strength value is not one + * of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL. + * @draft 2.2 + */ + public void setStrength(int newStrength) { + if ((newStrength != PRIMARY) && + (newStrength != SECONDARY) && + (newStrength != TERTIARY) && + (newStrength != QUATERNARY) && + (newStrength != IDENTICAL)) { + throw new IllegalArgumentException("Incorrect comparison level."); + } + m_strength_ = newStrength; + updateInternalState(); + } // public getters -------------------------------------------------------- @@ -508,6 +535,9 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate */ public CollationKey getCollationKey(String source) { + if (source == null) { + return null; + } boolean compare[] = {m_isCaseLevel_, true, m_strength_ >= SECONDARY, @@ -705,14 +735,14 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate // Find the length of any leading portion that is equal int offset = getFirstUnmatchedOffset(source, target); - if (source.charAt(offset) == 0) { - if (target.charAt(offset) == 0) { + if (offset == source.length()) { + if (offset == target.length()) { return 0; } - return 1; + return -1; } - else if (target.charAt(offset) == 0) { - return -1; + else if (target.length() == offset) { + return 1; } // setting up the collator parameters @@ -1168,14 +1198,6 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate m_contractionEnd_ = UCA_.m_contractionEnd_; m_minUnsafe_ = UCA_.m_minUnsafe_; m_minContractionEnd_ = UCA_.m_minContractionEnd_; - setStrengthDefault(); - setDecompositionDefault(); - setFrenchCollationDefault(); - setAlternateHandlingDefault(); - setCaseLevelDefault(); - setCaseFirstDefault(); - setHiraganaQuartenaryDefault(); - updateInternalState(); } Object rules = rb.getObject("CollationElements"); if (rules != null) { @@ -1204,7 +1226,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate /** * Initializes the RuleBasedCollator */ - protected synchronized final void init() + protected final void init() { for (m_minUnsafe_ = 0; m_minUnsafe_ < DEFAULT_MIN_HEURISTIC_; m_minUnsafe_ ++) { @@ -1222,13 +1244,13 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate break; } } - setStrengthDefault(); - setDecompositionDefault(); - setFrenchCollationDefault(); - setAlternateHandlingDefault(); - setCaseLevelDefault(); - setCaseFirstDefault(); - setHiraganaQuartenaryDefault(); + m_strength_ = m_defaultStrength_; + m_decomposition_ = m_defaultDecomposition_; + m_isFrenchCollation_ = m_defaultIsFrenchCollation_; + m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_; + m_isCaseLevel_ = m_defaultIsCaseLevel_; + m_caseFirst_ = m_defaultCaseFirst_; + m_isHiragana4_ = m_defaultIsHiragana4_; updateInternalState(); } @@ -1287,7 +1309,7 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate /** * Resets the internal case data members and compression values. */ - protected synchronized void updateInternalState() + protected void updateInternalState() { if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) { m_caseSwitch_ = CASE_SWITCH_; @@ -1354,6 +1376,9 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate m_defaultIsCaseLevel_ = (value == AttributeValue.ON_); break; case Attribute.NORMALIZATION_MODE_: + if (value == AttributeValue.ON_) { + value = Collator.CANONICAL_DECOMPOSITION; + } m_defaultDecomposition_ = value; break; case Attribute.STRENGTH_: @@ -1946,15 +1971,11 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate * @param commonBottom4 smallest common quaternary byte * @param bottomCount4 smallest quaternary byte */ - private synchronized final void getSortKeyBytes(String source, - boolean compare[], - byte bytes[][], - int bytescount[], - int count[], - boolean doFrench, - byte hiragana4, - int commonBottom4, - int bottomCount4) + private final void getSortKeyBytes(String source, boolean compare[], + byte bytes[][], int bytescount[], + int count[], boolean doFrench, + byte hiragana4, int commonBottom4, + int bottomCount4) { int backupDecomposition = m_decomposition_; m_decomposition_ = NO_DECOMPOSITION; // have to revert to backup later @@ -2305,18 +2326,23 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate private final int getFirstUnmatchedOffset(String source, String target) { int result = 0; - while (source.charAt(result) == target.charAt(result) + int minlength = source.length(); + if (minlength > target.length()) { + minlength = target.length(); + } + while (result < minlength + && source.charAt(result) == target.charAt(result) && source.charAt(result) != 0) { result ++; } - if (result > 0) { + if (result > 0 && result < minlength) { // There is an identical portion at the beginning of the two // strings. If the identical portion ends within a contraction or a // combining character sequence, back up to the start of that // sequence. char schar = source.charAt(result); // first differing chars char tchar = target.charAt(result); - if (schar != 0 && isUnsafe(schar) || tchar != 0 && isUnsafe(tchar)) + if (isUnsafe(schar) || isUnsafe(tchar)) { // We are stopped in the middle of a contraction or combining // sequence. @@ -2399,12 +2425,16 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate int cebuffersize[]) { // Preparing the context objects for iterating over strings - UCharacterIterator siter = new UCharacterIterator(source, textoffset, - source.length()); + StringCharacterIterator siter = new StringCharacterIterator(source, + textoffset, + source.length(), + textoffset); CollationElementIterator scoleiter = new CollationElementIterator( siter, this); - UCharacterIterator titer = new UCharacterIterator(target, textoffset, - target.length()); + StringCharacterIterator titer = new StringCharacterIterator(target, + textoffset, + target.length(), + textoffset); CollationElementIterator tcoleiter = new CollationElementIterator( titer, this); @@ -2610,19 +2640,20 @@ public class RuleBasedCollator extends Collator implements Trie.DataManipulate { // now, we're gonna reexamine collected CEs if (!doFrench) { // normal - int offset = 0; + int soffset = 0; + int toffset = 0; while (true) { int sorder = CollationElementIterator.IGNORABLE; while (sorder == CollationElementIterator.IGNORABLE) { - sorder = cebuffer[0][offset ++] & CE_SECONDARY_MASK_; + sorder = cebuffer[0][soffset ++] & CE_SECONDARY_MASK_; } int torder = CollationElementIterator.IGNORABLE; while (torder == CollationElementIterator.IGNORABLE) { - torder = cebuffer[1][offset ++] & CE_SECONDARY_MASK_; + torder = cebuffer[1][toffset ++] & CE_SECONDARY_MASK_; } if (sorder == torder) { - if (cebuffer[0][offset - 1] + if (cebuffer[0][soffset - 1] == CollationElementIterator.NULLORDER) { break; }