mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-22517 Limit the closure expansion loop and return error
To avoid very slow return from the constructor, we return error while the Collation rule expand too big. Add a soft limit to limit to the number of loop needed for 8 Hanguls Necessary number of loop: H(0)=0; H(i)=3H(i-1)+2. Where i is the length of Hangul in the rule. H(1) = 2, H(2) = 8, H(3)=26, H(4)=80, H(5) = 242 ...
This commit is contained in:
parent
f6d09d514d
commit
05b0e7abaf
5 changed files with 68 additions and 0 deletions
|
@ -1113,12 +1113,23 @@ CollationBuilder::addWithClosure(const UnicodeString &nfdPrefix, const UnicodeSt
|
|||
return ce32;
|
||||
}
|
||||
|
||||
// ICU-22517
|
||||
// This constant defines a limit for the addOnlyClosure to return
|
||||
// error, to avoid taking a long time for canonical closure expansion.
|
||||
// Please let us know if you have a reasonable use case that needed
|
||||
// for a practical Collation rule that needs to increase this limit.
|
||||
// This value is needed for compiling a rule with eight Hangul syllables such as
|
||||
// "&a=b쫊쫊쫊쫊쫊쫊쫊쫊" without error, which should be more than realistic
|
||||
// usage.
|
||||
static constexpr int32_t kClosureLoopLimit = 6560;
|
||||
|
||||
uint32_t
|
||||
CollationBuilder::addOnlyClosure(const UnicodeString &nfdPrefix, const UnicodeString &nfdString,
|
||||
const int64_t newCEs[], int32_t newCEsLength, uint32_t ce32,
|
||||
UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return ce32; }
|
||||
|
||||
int32_t loop = 0;
|
||||
// Map from canonically equivalent input to the CEs. (But not from the all-NFD input.)
|
||||
if(nfdPrefix.isEmpty()) {
|
||||
CanonicalIterator stringIter(nfdString, errorCode);
|
||||
|
@ -1128,6 +1139,11 @@ CollationBuilder::addOnlyClosure(const UnicodeString &nfdPrefix, const UnicodeSt
|
|||
UnicodeString str = stringIter.next();
|
||||
if(str.isBogus()) { break; }
|
||||
if(ignoreString(str, errorCode) || str == nfdString) { continue; }
|
||||
if (loop++ > kClosureLoopLimit) {
|
||||
// To avoid hang as in ICU-22517, return with error.
|
||||
errorCode = U_INPUT_TOO_LONG_ERROR;
|
||||
return ce32;
|
||||
}
|
||||
ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32, errorCode);
|
||||
if(U_FAILURE(errorCode)) { return ce32; }
|
||||
}
|
||||
|
@ -1144,6 +1160,11 @@ CollationBuilder::addOnlyClosure(const UnicodeString &nfdPrefix, const UnicodeSt
|
|||
UnicodeString str = stringIter.next();
|
||||
if(str.isBogus()) { break; }
|
||||
if(ignoreString(str, errorCode) || (samePrefix && str == nfdString)) { continue; }
|
||||
if (loop++ > kClosureLoopLimit) {
|
||||
// To avoid hang as in ICU-22517, return with error.
|
||||
errorCode = U_INPUT_TOO_LONG_ERROR;
|
||||
return ce32;
|
||||
}
|
||||
ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32, errorCode);
|
||||
if(U_FAILURE(errorCode)) { return ce32; }
|
||||
}
|
||||
|
|
|
@ -1249,6 +1249,18 @@ void CollationRegressionTest::TestBeforeWithTooStrongAfter() {
|
|||
}
|
||||
}
|
||||
|
||||
void CollationRegressionTest::TestICU22517() {
|
||||
IcuTestErrorCode errorCode(*this, "TestICU22517");
|
||||
char16_t data[] = u"&a=b쫊쫊쫊쫊쫊쫊쫊쫊";
|
||||
icu::UnicodeString rule(true, data, -1);
|
||||
int length = quick ? rule.length()-2 : rule.length();
|
||||
for (int i = 4; i <= length; i++) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
icu::LocalPointer<icu::RuleBasedCollator> col1(
|
||||
new icu::RuleBasedCollator(rule.tempSubString(0, i), status));
|
||||
}
|
||||
}
|
||||
|
||||
void CollationRegressionTest::TestICU22277() {
|
||||
IcuTestErrorCode errorCode(*this, "TestICU22277");
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
@ -1408,6 +1420,7 @@ void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const ch
|
|||
TESTCASE_AUTO(TestTrailingComment);
|
||||
TESTCASE_AUTO(TestBeforeWithTooStrongAfter);
|
||||
TESTCASE_AUTO(TestICU22277);
|
||||
TESTCASE_AUTO(TestICU22517);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
|
|
@ -240,6 +240,8 @@ public:
|
|||
// Test use-of-uninitialized-value
|
||||
void TestICU22277();
|
||||
|
||||
void TestICU22517();
|
||||
|
||||
private:
|
||||
//------------------------------------------------------------------------
|
||||
// Internal utilities
|
||||
|
|
|
@ -24,6 +24,7 @@ import com.ibm.icu.text.Collator;
|
|||
import com.ibm.icu.text.Normalizer2;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.UnicodeSetIterator;
|
||||
import com.ibm.icu.util.ICUInputTooLongException;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
public final class CollationBuilder extends CollationRuleParser.Sink {
|
||||
|
@ -862,10 +863,21 @@ public final class CollationBuilder extends CollationRuleParser.Sink {
|
|||
return ce32;
|
||||
}
|
||||
|
||||
// ICU-22517
|
||||
// This constant defines a limit for the addOnlyClosure to return
|
||||
// error, to avoid taking a long time for canonical closure expansion.
|
||||
// Please let us know if you have a reasonable use case that needed
|
||||
// for a practical Collation rule that needs to increase this limit.
|
||||
// This value is needed for compiling a rule with eight Hangul syllables such as
|
||||
// "&a=b쫊쫊쫊쫊쫊쫊쫊쫊" without error, which should be more than realistic
|
||||
// usage.
|
||||
static private int kClosureLoopLimit = 6560;
|
||||
|
||||
private int addOnlyClosure(CharSequence nfdPrefix, CharSequence nfdString,
|
||||
long[] newCEs, int newCEsLength, int ce32) {
|
||||
// Map from canonically equivalent input to the CEs. (But not from the all-NFD input.)
|
||||
// TODO: make CanonicalIterator work with CharSequence, or maybe change arguments here to String
|
||||
int loop = 0;
|
||||
if(nfdPrefix.length() == 0) {
|
||||
CanonicalIterator stringIter = new CanonicalIterator(nfdString.toString());
|
||||
String prefix = "";
|
||||
|
@ -873,6 +885,9 @@ public final class CollationBuilder extends CollationRuleParser.Sink {
|
|||
String str = stringIter.next();
|
||||
if(str == null) { break; }
|
||||
if(ignoreString(str) || str.contentEquals(nfdString)) { continue; }
|
||||
if (loop++ > kClosureLoopLimit) {
|
||||
throw new ICUInputTooLongException("Too many closure");
|
||||
}
|
||||
ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32);
|
||||
}
|
||||
} else {
|
||||
|
@ -887,6 +902,9 @@ public final class CollationBuilder extends CollationRuleParser.Sink {
|
|||
String str = stringIter.next();
|
||||
if(str == null) { break; }
|
||||
if(ignoreString(str) || (samePrefix && str.contentEquals(nfdString))) { continue; }
|
||||
if (loop++ > kClosureLoopLimit) {
|
||||
throw new ICUInputTooLongException("Too many closure");
|
||||
}
|
||||
ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32);
|
||||
}
|
||||
stringIter.reset();
|
||||
|
|
|
@ -1229,6 +1229,20 @@ public class CollationRegressionTest extends TestFmwk {
|
|||
assertTrue("b<a", coll.compare("b", "a") < 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestICU22517() {
|
||||
boolean quick = TestFmwk.getExhaustiveness() <= 5;
|
||||
String rule = "&a=b쫊쫊쫊쫊쫊쫊쫊쫊";
|
||||
int length = quick ? (rule.length()-2) : rule.length();
|
||||
for (int i = 4; i <= length; i++) {
|
||||
try {
|
||||
RuleBasedCollator coll = new RuleBasedCollator(rule.substring(0, i));
|
||||
} catch (Exception e) {
|
||||
// silence ignore.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestBeforeWithTooStrongAfter() {
|
||||
// ICU ticket #9959:
|
||||
|
|
Loading…
Add table
Reference in a new issue