mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-04 21:15:35 +00:00
ICU-22555 Fix infinity loop in RuleBasedCollator constructor
Fix C++ and Java code. Add unit tests for both C++ and Java.
This commit is contained in:
parent
8d3d214ad7
commit
757d2cd90a
6 changed files with 80 additions and 5 deletions
|
@ -264,10 +264,19 @@ CleanPartialInitialization:
|
|||
* @param source the string to find permutations for
|
||||
* @return the results in a set.
|
||||
*/
|
||||
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
|
||||
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth) {
|
||||
if(U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
// To avoid infinity loop caused by permute, we limit the depth of recursive
|
||||
// call to permute and return U_UNSUPPORTED_ERROR.
|
||||
// We know in some unit test we need at least 4. Set to 8 just in case some
|
||||
// unforseen use cases.
|
||||
constexpr int32_t kPermuteDepthLimit = 8;
|
||||
if (depth > kPermuteDepthLimit) {
|
||||
status = U_UNSUPPORTED_ERROR;
|
||||
return;
|
||||
}
|
||||
//if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
|
||||
int32_t i = 0;
|
||||
|
||||
|
@ -311,7 +320,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
|
|||
|
||||
// see what the permutations of the characters before and after this one are
|
||||
//Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
|
||||
permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status);
|
||||
permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status, depth+1);
|
||||
/* Test for buffer overflows */
|
||||
if(U_FAILURE(status)) {
|
||||
return;
|
||||
|
|
|
@ -128,9 +128,10 @@ public:
|
|||
* @param skipZeros determine if skip zeros
|
||||
* @param result the results in a set.
|
||||
* @param status Fill-in parameter which receives the status of this operation.
|
||||
* @param depth depth of the call.
|
||||
* @internal
|
||||
*/
|
||||
static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
|
||||
static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth=0);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/**
|
||||
|
|
|
@ -1249,6 +1249,25 @@ void CollationRegressionTest::TestBeforeWithTooStrongAfter() {
|
|||
}
|
||||
}
|
||||
|
||||
void CollationRegressionTest::TestICU22555InfinityLoop() {
|
||||
char16_t data[] = {
|
||||
0x0020, 0x0026, 0x4000, 0x002c, 0x6601, 0x0106, 0xff7f, 0xff99,
|
||||
0x003b, 0x1141, 0x106a, 0x1006, 0x0001, 0x0080, 0x1141, 0x106a,
|
||||
0x0026, 0x00ff, 0xff6f, 0xff99, 0x013b, 0x1141, 0x1067, 0x1026,
|
||||
0x0601, 0x0080, 0x5f03, 0x17e3, 0x0000, 0x3e00, 0x3e3e, 0x0055,
|
||||
0x8080, 0x0000, 0x01e4, 0x0000, 0x0300, 0x003d, 0x4cff, 0x8053,
|
||||
0x7a65, 0x0000, 0x6400, 0x5f00, 0x0150, 0x9090, 0x9090, 0x2f5f,
|
||||
0x0053, 0xffe4, 0x002c, 0x0300, 0x1f3d, 0x55f7, 0x8053, 0x1750,
|
||||
0x3d00, 0xff00, 0x00ff, 0xff6f, 0x0099, 0x03fa, 0x0303, 0x0303,
|
||||
0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303,
|
||||
0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303,
|
||||
};
|
||||
icu::UnicodeString rule(false, data, sizeof(data)/sizeof(char16_t));
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
icu::LocalPointer<icu::RuleBasedCollator> col1(
|
||||
new icu::RuleBasedCollator(rule, status));
|
||||
}
|
||||
|
||||
void CollationRegressionTest::TestICU22517() {
|
||||
IcuTestErrorCode errorCode(*this, "TestICU22517");
|
||||
char16_t data[] = u"&a=b쫊쫊쫊쫊쫊쫊쫊쫊";
|
||||
|
@ -1421,6 +1440,7 @@ void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const ch
|
|||
TESTCASE_AUTO(TestBeforeWithTooStrongAfter);
|
||||
TESTCASE_AUTO(TestICU22277);
|
||||
TESTCASE_AUTO(TestICU22517);
|
||||
TESTCASE_AUTO(TestICU22555InfinityLoop);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
|
|
@ -242,6 +242,8 @@ public:
|
|||
|
||||
void TestICU22517();
|
||||
|
||||
void TestICU22555InfinityLoop();
|
||||
|
||||
private:
|
||||
//------------------------------------------------------------------------
|
||||
// Internal utilities
|
||||
|
|
|
@ -1243,6 +1243,27 @@ public class CollationRegressionTest extends TestFmwk {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestICU22555InfinityLoop() {
|
||||
char data[] = {
|
||||
0x0020, 0x0026, 0x4000, 0x002c, 0x6601, 0x0106, 0xff7f, 0xff99,
|
||||
0x003b, 0x1141, 0x106a, 0x1006, 0x0001, 0x0080, 0x1141, 0x106a,
|
||||
0x0026, 0x00ff, 0xff6f, 0xff99, 0x013b, 0x1141, 0x1067, 0x1026,
|
||||
0x0601, 0x0080, 0x5f03, 0x17e3, 0x0000, 0x3e00, 0x3e3e, 0x0055,
|
||||
0x8080, 0x0000, 0x01e4, 0x0000, 0x0300, 0x003d, 0x4cff, 0x8053,
|
||||
0x7a65, 0x0000, 0x6400, 0x5f00, 0x0150, 0x9090, 0x9090, 0x2f5f,
|
||||
0x0053, 0xffe4, 0x002c, 0x0300, 0x1f3d, 0x55f7, 0x8053, 0x1750,
|
||||
0x3d00, 0xff00, 0x00ff, 0xff6f, 0x0099, 0x03fa, 0x0303, 0x0303,
|
||||
0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303,
|
||||
0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303,
|
||||
};
|
||||
String rule = new String(data, 0, data.length);
|
||||
try {
|
||||
RuleBasedCollator coll = new RuleBasedCollator(rule);
|
||||
} catch(Exception expected) {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestBeforeWithTooStrongAfter() {
|
||||
// ICU ticket #9959:
|
||||
|
|
|
@ -154,6 +154,10 @@ public final class CanonicalIterator {
|
|||
}
|
||||
}
|
||||
|
||||
// To avoid infinity loop caused by permute, we limit the depth of recursive
|
||||
// call to permute and throw exception.
|
||||
// We know in some unit test we need at least 4. Set to 8 just in case some
|
||||
// unforseen use cases.
|
||||
/**
|
||||
* Simple implementation of permutation.
|
||||
* <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>
|
||||
|
@ -165,9 +169,27 @@ public final class CanonicalIterator {
|
|||
*/
|
||||
@Deprecated
|
||||
public static void permute(String source, boolean skipZeros, Set<String> output) {
|
||||
permute(source, skipZeros, output, 0);
|
||||
}
|
||||
|
||||
private static int PERMUTE_DEPTH_LIMIT = 8;
|
||||
/**
|
||||
* Simple implementation of permutation.
|
||||
* <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>
|
||||
* @param source the string to find permutations for
|
||||
* @param skipZeros set to true to skip characters with canonical combining class zero
|
||||
* @param output the set to add the results to
|
||||
* @param depth the depth of the recursive call.
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
*/
|
||||
@Deprecated
|
||||
private static void permute(String source, boolean skipZeros, Set<String> output, int depth) {
|
||||
// TODO: optimize
|
||||
//if (PROGRESS) System.out.println("Permute: " + source);
|
||||
|
||||
if (depth > PERMUTE_DEPTH_LIMIT) {
|
||||
throw new UnsupportedOperationException("Stack too deep:" + depth);
|
||||
}
|
||||
// optimization:
|
||||
// if zero or one character, just return a set with it
|
||||
// we check for length < 2 to keep from counting code points all the time
|
||||
|
@ -193,7 +215,7 @@ public final class CanonicalIterator {
|
|||
// see what the permutations of the characters before and after this one are
|
||||
subpermute.clear();
|
||||
permute(source.substring(0,i)
|
||||
+ source.substring(i + UTF16.getCharCount(cp)), skipZeros, subpermute);
|
||||
+ source.substring(i + UTF16.getCharCount(cp)), skipZeros, subpermute, depth+1);
|
||||
|
||||
// prefix this character to all of them
|
||||
String chStr = UTF16.valueOf(source, i);
|
||||
|
|
Loading…
Add table
Reference in a new issue