ICU-7264 fix weight allocation and align sort key constants with C++

X-SVN-Rev: 28908
This commit is contained in:
Markus Scherer 2010-10-25 23:02:08 +00:00
parent d65b6532f8
commit f6fb60ff6f
4 changed files with 48 additions and 67 deletions

View file

@ -727,17 +727,7 @@ final class CollationParsedRuleBuilder {
* @return 0 if equals, 1 if this is > target, -1 otherwise
*/
public int compareTo(WeightRange target) {
if (this == target) {
return 0;
}
int tstart = target.m_start_;
if (m_start_ == tstart) {
return 0;
}
if (m_start_ > tstart) {
return 1;
}
return -1;
return Utility.compareUnsigned(m_start_, target.m_start_);
}
/**
@ -1584,9 +1574,12 @@ final class CollationParsedRuleBuilder {
}
}
}
if (low == 0) {
low = 0x01000000;
if(0 <= low && low < 0x02000000) { // unsigned comparison < 0x02000000
// We must not use CE weight byte 02, so we set it as the minimum lower bound.
// See http://site.icu-project.org/design/collation/bytes
low = 0x02000000;
}
if (strength == Collator.SECONDARY) { // similar as simple
if (Utility.compareUnsigned(low,
RuleBasedCollator.COMMON_BOTTOM_2_ << 24) >= 0

View file

@ -1697,32 +1697,14 @@ public final class RuleBasedCollator extends Collator
* Implicit generator
*/
static final ImplicitCEGenerator impCEGen_;
// /**
// * Implicit constants
// */
// static final int IMPLICIT_BASE_BYTE_;
// static final int IMPLICIT_LIMIT_BYTE_;
// static final int IMPLICIT_4BYTE_BOUNDARY_;
// static final int LAST_MULTIPLIER_;
// static final int LAST2_MULTIPLIER_;
// static final int IMPLICIT_BASE_3BYTE_;
// static final int IMPLICIT_BASE_4BYTE_;
// static final int BYTES_TO_AVOID_ = 3;
// static final int OTHER_COUNT_ = 256 - BYTES_TO_AVOID_;
// static final int LAST_COUNT_ = OTHER_COUNT_ / 2;
// /**
// * Room for intervening, without expanding to 5 bytes
// */
// static final int LAST_COUNT2_ = OTHER_COUNT_ / 21;
// static final int IMPLICIT_3BYTE_COUNT_ = 1;
//
static final byte SORT_LEVEL_TERMINATOR_ = 1;
// These are values from UCA required for
// implicit generation and supressing sort key compression
// they should regularly be in the UCA, but if one
// is running without UCA, it could be a problem
static final int maxRegularPrimary = 0xA0;
static final int maxRegularPrimary = 0x7A;
static final int minImplicitPrimary = 0xE0;
static final int maxImplicitPrimary = 0xE4;
@ -2108,9 +2090,10 @@ public final class RuleBasedCollator extends Collator
private static final byte BYTE_SHIFT_PREFIX_ = (byte)0x03;
/*private*/ static final byte BYTE_UNSHIFTED_MIN_ = BYTE_SHIFT_PREFIX_;
//private static final byte BYTE_FIRST_UCA_ = BYTE_COMMON_;
static final byte CODAN_PLACEHOLDER = 0x27;
//private static final byte BYTE_LAST_LATIN_PRIMARY_ = (byte)0x4C;
private static final byte BYTE_FIRST_NON_LATIN_PRIMARY_ = (byte)0x4D;
// TODO: Make the following values dynamic since they change with almost every UCA version.
static final byte CODAN_PLACEHOLDER = 0x12;
private static final byte BYTE_FIRST_NON_LATIN_PRIMARY_ = (byte)0x5B;
private static final byte BYTE_UNSHIFTED_MAX_ = (byte)0xFF;
private static final int TOTAL_2_ = COMMON_TOP_2_ - COMMON_BOTTOM_2_ - 1;
private static final int FLAG_BIT_MASK_CASE_SWITCH_OFF_ = 0x80;
@ -2341,6 +2324,14 @@ public final class RuleBasedCollator extends Collator
return 0;
}
// Is this primary weight compressible?
// Returns false for multi-lead-byte scripts (digits, Latin, Han, implicit).
// TODO: This should use per-lead-byte flags from FractionalUCA.txt.
static boolean
isCompressible(int primary1) {
return BYTE_FIRST_NON_LATIN_PRIMARY_ <= primary1 && primary1 <= maxRegularPrimary;
}
/**
* Gets the 2 bytes of primary order and adds it to the primary byte array
* @param ce current ce
@ -2417,33 +2408,27 @@ public final class RuleBasedCollator extends Collator
m_utilBytesCount1_ ++;
leadPrimary = 0;
}
else if (p1 < BYTE_FIRST_NON_LATIN_PRIMARY_
|| (p1 > maxRegularPrimary
//> (RuleBasedCollator.UCA_CONSTANTS_.LAST_NON_VARIABLE_[0]
// >>> 24)
&& p1 < minImplicitPrimary
//< (RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_[0]
// >>> 24)
)) {
// not compressible
leadPrimary = 0;
m_utilBytes1_ = append(m_utilBytes1_,
m_utilBytesCount1_,
(byte)p1);
m_utilBytesCount1_ ++;
m_utilBytes1_ = append(m_utilBytes1_,
m_utilBytesCount1_,
(byte)p2);
m_utilBytesCount1_ ++;
}
else { // compress
else if (isCompressible(p1)) {
// compress
leadPrimary = p1;
m_utilBytes1_ = append(m_utilBytes1_,
m_utilBytesCount1_,
(byte)p1);
m_utilBytesCount1_ ++;
m_utilBytes1_ = append(m_utilBytes1_,
m_utilBytesCount1_, (byte)p2);
m_utilBytesCount1_,
(byte)p2);
m_utilBytesCount1_ ++;
}
else {
leadPrimary = 0;
m_utilBytes1_ = append(m_utilBytes1_,
m_utilBytesCount1_,
(byte)p1);
m_utilBytesCount1_ ++;
m_utilBytes1_ = append(m_utilBytes1_,
m_utilBytesCount1_,
(byte)p2);
m_utilBytesCount1_ ++;
}
}

View file

@ -53,17 +53,18 @@ public class CollationCreationMethodTest extends TestFmwk
for(z = 0; z < 60; z++)
{
x = r.nextInt(locales.length);
Locale locale = locales[x];
try
{
//this is making the assumption that the only type of collator that will be made is RBC
localeCollator = (RuleBasedCollator)Collator.getInstance(locales[x]);
localeCollator = (RuleBasedCollator)Collator.getInstance(locale);
logln("Rules for " + locale + " are: " + localeCollator.getRules());
ruleCollator = new RuleBasedCollator(localeCollator.getRules());
logln("Rules are: " + localeCollator.getRules());
}
catch (Exception e)
{
warnln("ERROR: in creation of collator of " + locales[x].getDisplayName() + " locale");
warnln("ERROR: in creation of collator of locale " + locale.getDisplayName() + ": " + e);
return;
}
@ -77,7 +78,7 @@ public class CollationCreationMethodTest extends TestFmwk
key1 = localeCollator.getCollationKey(randString1);
key2 = ruleCollator.getCollationKey(randString1);
report(locales[x].getDisplayName(), randString1, key1, key2);
report(locale.getDisplayName(), randString1, key1, key2);
}
}
}
@ -112,11 +113,12 @@ public class CollationCreationMethodTest extends TestFmwk
{
if (!k1.equals(k2))
{
String msg = "";
msg += "With " + localeName + "Collator: ";
msg += string1;
msg += " failed to produce identical keys on both collators";
errln(msg);
StringBuilder msg = new StringBuilder();
msg.append("With ").append(localeName).append(" collator\n and input string: ").append(string1).append('\n');
msg.append(" failed to produce identical keys on both collators\n");
msg.append(" localeCollator key: ").append(CollationMiscTest.prettify(k1)).append('\n');
msg.append(" ruleCollator key: ").append(CollationMiscTest.prettify(k2)).append('\n');
errln(msg.toString());
}
}
}

View file

@ -213,6 +213,7 @@ public class CollationMiscTest extends TestFmwk {
};
for (int i = 0; i< rules.length; i++) {
logln(String.format("rules[%d] = \"%s\"", i, rules[i]));
genericRulesStarter(rules[i], data[i]);
}
}
@ -321,7 +322,7 @@ public class CollationMiscTest extends TestFmwk {
return target;
}
String prettify(CollationKey sourceKey) {
static String prettify(CollationKey sourceKey) {
int i;
byte[] bytes= sourceKey.toByteArray();
StringBuilder target = new StringBuilder("[");