mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 01:11:02 +00:00
ICU-7264 fix weight allocation and align sort key constants with C++
X-SVN-Rev: 28908
This commit is contained in:
parent
d65b6532f8
commit
f6fb60ff6f
4 changed files with 48 additions and 67 deletions
|
@ -727,17 +727,7 @@ final class CollationParsedRuleBuilder {
|
|||
* @return 0 if equals, 1 if this is > target, -1 otherwise
|
||||
*/
|
||||
public int compareTo(WeightRange target) {
|
||||
if (this == target) {
|
||||
return 0;
|
||||
}
|
||||
int tstart = target.m_start_;
|
||||
if (m_start_ == tstart) {
|
||||
return 0;
|
||||
}
|
||||
if (m_start_ > tstart) {
|
||||
return 1;
|
||||
}
|
||||
return -1;
|
||||
return Utility.compareUnsigned(m_start_, target.m_start_);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1584,9 +1574,12 @@ final class CollationParsedRuleBuilder {
|
|||
}
|
||||
}
|
||||
}
|
||||
if (low == 0) {
|
||||
low = 0x01000000;
|
||||
if(0 <= low && low < 0x02000000) { // unsigned comparison < 0x02000000
|
||||
// We must not use CE weight byte 02, so we set it as the minimum lower bound.
|
||||
// See http://site.icu-project.org/design/collation/bytes
|
||||
low = 0x02000000;
|
||||
}
|
||||
|
||||
if (strength == Collator.SECONDARY) { // similar as simple
|
||||
if (Utility.compareUnsigned(low,
|
||||
RuleBasedCollator.COMMON_BOTTOM_2_ << 24) >= 0
|
||||
|
|
|
@ -1697,32 +1697,14 @@ public final class RuleBasedCollator extends Collator
|
|||
* Implicit generator
|
||||
*/
|
||||
static final ImplicitCEGenerator impCEGen_;
|
||||
// /**
|
||||
// * Implicit constants
|
||||
// */
|
||||
// static final int IMPLICIT_BASE_BYTE_;
|
||||
// static final int IMPLICIT_LIMIT_BYTE_;
|
||||
// static final int IMPLICIT_4BYTE_BOUNDARY_;
|
||||
// static final int LAST_MULTIPLIER_;
|
||||
// static final int LAST2_MULTIPLIER_;
|
||||
// static final int IMPLICIT_BASE_3BYTE_;
|
||||
// static final int IMPLICIT_BASE_4BYTE_;
|
||||
// static final int BYTES_TO_AVOID_ = 3;
|
||||
// static final int OTHER_COUNT_ = 256 - BYTES_TO_AVOID_;
|
||||
// static final int LAST_COUNT_ = OTHER_COUNT_ / 2;
|
||||
// /**
|
||||
// * Room for intervening, without expanding to 5 bytes
|
||||
// */
|
||||
// static final int LAST_COUNT2_ = OTHER_COUNT_ / 21;
|
||||
// static final int IMPLICIT_3BYTE_COUNT_ = 1;
|
||||
//
|
||||
|
||||
static final byte SORT_LEVEL_TERMINATOR_ = 1;
|
||||
|
||||
// These are values from UCA required for
|
||||
// implicit generation and supressing sort key compression
|
||||
// they should regularly be in the UCA, but if one
|
||||
// is running without UCA, it could be a problem
|
||||
static final int maxRegularPrimary = 0xA0;
|
||||
static final int maxRegularPrimary = 0x7A;
|
||||
static final int minImplicitPrimary = 0xE0;
|
||||
static final int maxImplicitPrimary = 0xE4;
|
||||
|
||||
|
@ -2108,9 +2090,10 @@ public final class RuleBasedCollator extends Collator
|
|||
private static final byte BYTE_SHIFT_PREFIX_ = (byte)0x03;
|
||||
/*private*/ static final byte BYTE_UNSHIFTED_MIN_ = BYTE_SHIFT_PREFIX_;
|
||||
//private static final byte BYTE_FIRST_UCA_ = BYTE_COMMON_;
|
||||
static final byte CODAN_PLACEHOLDER = 0x27;
|
||||
//private static final byte BYTE_LAST_LATIN_PRIMARY_ = (byte)0x4C;
|
||||
private static final byte BYTE_FIRST_NON_LATIN_PRIMARY_ = (byte)0x4D;
|
||||
// TODO: Make the following values dynamic since they change with almost every UCA version.
|
||||
static final byte CODAN_PLACEHOLDER = 0x12;
|
||||
private static final byte BYTE_FIRST_NON_LATIN_PRIMARY_ = (byte)0x5B;
|
||||
|
||||
private static final byte BYTE_UNSHIFTED_MAX_ = (byte)0xFF;
|
||||
private static final int TOTAL_2_ = COMMON_TOP_2_ - COMMON_BOTTOM_2_ - 1;
|
||||
private static final int FLAG_BIT_MASK_CASE_SWITCH_OFF_ = 0x80;
|
||||
|
@ -2341,6 +2324,14 @@ public final class RuleBasedCollator extends Collator
|
|||
return 0;
|
||||
}
|
||||
|
||||
// Is this primary weight compressible?
|
||||
// Returns false for multi-lead-byte scripts (digits, Latin, Han, implicit).
|
||||
// TODO: This should use per-lead-byte flags from FractionalUCA.txt.
|
||||
static boolean
|
||||
isCompressible(int primary1) {
|
||||
return BYTE_FIRST_NON_LATIN_PRIMARY_ <= primary1 && primary1 <= maxRegularPrimary;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the 2 bytes of primary order and adds it to the primary byte array
|
||||
* @param ce current ce
|
||||
|
@ -2417,33 +2408,27 @@ public final class RuleBasedCollator extends Collator
|
|||
m_utilBytesCount1_ ++;
|
||||
leadPrimary = 0;
|
||||
}
|
||||
else if (p1 < BYTE_FIRST_NON_LATIN_PRIMARY_
|
||||
|| (p1 > maxRegularPrimary
|
||||
//> (RuleBasedCollator.UCA_CONSTANTS_.LAST_NON_VARIABLE_[0]
|
||||
// >>> 24)
|
||||
&& p1 < minImplicitPrimary
|
||||
//< (RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_[0]
|
||||
// >>> 24)
|
||||
)) {
|
||||
// not compressible
|
||||
leadPrimary = 0;
|
||||
m_utilBytes1_ = append(m_utilBytes1_,
|
||||
m_utilBytesCount1_,
|
||||
(byte)p1);
|
||||
m_utilBytesCount1_ ++;
|
||||
m_utilBytes1_ = append(m_utilBytes1_,
|
||||
m_utilBytesCount1_,
|
||||
(byte)p2);
|
||||
m_utilBytesCount1_ ++;
|
||||
}
|
||||
else { // compress
|
||||
else if (isCompressible(p1)) {
|
||||
// compress
|
||||
leadPrimary = p1;
|
||||
m_utilBytes1_ = append(m_utilBytes1_,
|
||||
m_utilBytesCount1_,
|
||||
(byte)p1);
|
||||
m_utilBytesCount1_ ++;
|
||||
m_utilBytes1_ = append(m_utilBytes1_,
|
||||
m_utilBytesCount1_, (byte)p2);
|
||||
m_utilBytesCount1_,
|
||||
(byte)p2);
|
||||
m_utilBytesCount1_ ++;
|
||||
}
|
||||
else {
|
||||
leadPrimary = 0;
|
||||
m_utilBytes1_ = append(m_utilBytes1_,
|
||||
m_utilBytesCount1_,
|
||||
(byte)p1);
|
||||
m_utilBytesCount1_ ++;
|
||||
m_utilBytes1_ = append(m_utilBytes1_,
|
||||
m_utilBytesCount1_,
|
||||
(byte)p2);
|
||||
m_utilBytesCount1_ ++;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,17 +53,18 @@ public class CollationCreationMethodTest extends TestFmwk
|
|||
for(z = 0; z < 60; z++)
|
||||
{
|
||||
x = r.nextInt(locales.length);
|
||||
Locale locale = locales[x];
|
||||
|
||||
try
|
||||
{
|
||||
//this is making the assumption that the only type of collator that will be made is RBC
|
||||
localeCollator = (RuleBasedCollator)Collator.getInstance(locales[x]);
|
||||
localeCollator = (RuleBasedCollator)Collator.getInstance(locale);
|
||||
logln("Rules for " + locale + " are: " + localeCollator.getRules());
|
||||
ruleCollator = new RuleBasedCollator(localeCollator.getRules());
|
||||
logln("Rules are: " + localeCollator.getRules());
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
warnln("ERROR: in creation of collator of " + locales[x].getDisplayName() + " locale");
|
||||
warnln("ERROR: in creation of collator of locale " + locale.getDisplayName() + ": " + e);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -77,7 +78,7 @@ public class CollationCreationMethodTest extends TestFmwk
|
|||
key1 = localeCollator.getCollationKey(randString1);
|
||||
key2 = ruleCollator.getCollationKey(randString1);
|
||||
|
||||
report(locales[x].getDisplayName(), randString1, key1, key2);
|
||||
report(locale.getDisplayName(), randString1, key1, key2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -112,11 +113,12 @@ public class CollationCreationMethodTest extends TestFmwk
|
|||
{
|
||||
if (!k1.equals(k2))
|
||||
{
|
||||
String msg = "";
|
||||
msg += "With " + localeName + "Collator: ";
|
||||
msg += string1;
|
||||
msg += " failed to produce identical keys on both collators";
|
||||
errln(msg);
|
||||
StringBuilder msg = new StringBuilder();
|
||||
msg.append("With ").append(localeName).append(" collator\n and input string: ").append(string1).append('\n');
|
||||
msg.append(" failed to produce identical keys on both collators\n");
|
||||
msg.append(" localeCollator key: ").append(CollationMiscTest.prettify(k1)).append('\n');
|
||||
msg.append(" ruleCollator key: ").append(CollationMiscTest.prettify(k2)).append('\n');
|
||||
errln(msg.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -213,6 +213,7 @@ public class CollationMiscTest extends TestFmwk {
|
|||
};
|
||||
|
||||
for (int i = 0; i< rules.length; i++) {
|
||||
logln(String.format("rules[%d] = \"%s\"", i, rules[i]));
|
||||
genericRulesStarter(rules[i], data[i]);
|
||||
}
|
||||
}
|
||||
|
@ -321,7 +322,7 @@ public class CollationMiscTest extends TestFmwk {
|
|||
return target;
|
||||
}
|
||||
|
||||
String prettify(CollationKey sourceKey) {
|
||||
static String prettify(CollationKey sourceKey) {
|
||||
int i;
|
||||
byte[] bytes= sourceKey.toByteArray();
|
||||
StringBuilder target = new StringBuilder("[");
|
||||
|
|
Loading…
Add table
Reference in a new issue