ICU-22602 Fix stack overflow inside flattenVariables

Limit the recursive call of flattenVariables to maximum depth 3500
since Java on my machine throw stack overflow exception around 3900.
This commit is contained in:
Frank Tang 2023-12-12 15:45:58 -08:00 committed by Frank Yung-Fong Tang
parent 99f6be4345
commit 19af9e7ce3
8 changed files with 55 additions and 10 deletions

View file

@ -239,7 +239,17 @@ RBBINode *RBBINode::cloneTree() {
// nested references are handled by cloneTree(), not here.
//
//-------------------------------------------------------------------------
RBBINode *RBBINode::flattenVariables() {
constexpr int kRecursiveDepthLimit = 3500;
RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) {
if (U_FAILURE(status)) {
return this;
}
// If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
// to avoid stack overflow crash.
if (depth > kRecursiveDepthLimit) {
status = U_INPUT_TOO_LONG_ERROR;
return this;
}
if (fType == varRef) {
RBBINode *retNode = fLeftChild->cloneTree();
if (retNode != nullptr) {
@ -251,11 +261,11 @@ RBBINode *RBBINode::flattenVariables() {
}
if (fLeftChild != nullptr) {
fLeftChild = fLeftChild->flattenVariables();
fLeftChild = fLeftChild->flattenVariables(status, depth+1);
fLeftChild->fParent = this;
}
if (fRightChild != nullptr) {
fRightChild = fRightChild->flattenVariables();
fRightChild = fRightChild->flattenVariables(status, depth+1);
fRightChild->fParent = this;
}
return this;

View file

@ -97,7 +97,7 @@ class RBBINode : public UMemory {
static void NRDeleteNode(RBBINode *node);
RBBINode *cloneTree();
RBBINode *flattenVariables();
RBBINode *flattenVariables(UErrorCode &status, int depth=0);
void flattenSets();
void findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status);

View file

@ -81,7 +81,10 @@ void RBBITableBuilder::buildForwardTable() {
// Walk through the tree, replacing any references to $variables with a copy of the
// parse tree for the substitution expression.
//
fTree = fTree->flattenVariables();
fTree = fTree->flattenVariables(*fStatus, 0);
if (U_FAILURE(*fStatus)) {
return;
}
#ifdef RBBI_DEBUG
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) {
RBBIDebugPuts("\nParse tree after flattening variable references.");

View file

@ -148,6 +148,7 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
TESTCASE_AUTO(TestBug22581);
TESTCASE_AUTO(TestBug22584);
TESTCASE_AUTO(TestBug22585);
TESTCASE_AUTO(TestBug22602);
#if U_ENABLE_TRACING
TESTCASE_AUTO(TestTraceCreateCharacter);
@ -5879,6 +5880,15 @@ void RBBITest::TestBug22585() {
RuleBasedBreakIterator bi2(rule, pe, ec);
}
// Test a long string with a ; in the end will not cause stack overflow.
void RBBITest::TestBug22602() {
UnicodeString rule(25000, (UChar32)'A', 25000-1);
rule.append(u";");
UParseError pe {};
UErrorCode ec {U_ZERO_ERROR};
RuleBasedBreakIterator bi(rule, pe, ec);
}
void RBBITest::TestBug22584() {
// Creating a break iterator from a rule consisting of a very long
// literal input string caused a stack overflow when deleting the

View file

@ -102,6 +102,7 @@ public:
void TestBug22581();
void TestBug22584();
void TestBug22585();
void TestBug22602();
#if U_ENABLE_TRACING
void TestTraceCreateCharacter();

View file

@ -176,7 +176,6 @@ class RBBINode {
}
//-------------------------------------------------------------------------
//
// flattenVariables Walk a parse tree, replacing any variable
@ -195,7 +194,11 @@ class RBBINode {
// nested references are handled by cloneTree(), not here.
//
//-------------------------------------------------------------------------
RBBINode flattenVariables() {
static final private int kRecursiveDepthLimit = 3500;
RBBINode flattenVariables(int depth) {
if (depth > kRecursiveDepthLimit) {
throw new IllegalArgumentException("The input is too long");
}
if (fType == varRef) {
RBBINode retNode = fLeftChild.cloneTree();
retNode.fRuleRoot = this.fRuleRoot;
@ -204,11 +207,11 @@ class RBBINode {
}
if (fLeftChild != null) {
fLeftChild = fLeftChild.flattenVariables();
fLeftChild = fLeftChild.flattenVariables(depth+1);
fLeftChild.fParent = this;
}
if (fRightChild != null) {
fRightChild = fRightChild.flattenVariables();
fRightChild = fRightChild.flattenVariables(depth+1);
fRightChild.fParent = this;
}
return this;

View file

@ -116,7 +116,7 @@ class RBBITableBuilder {
// Walk through the tree, replacing any references to $variables with a copy of the
// parse tree for the substitution expression.
//
fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].flattenVariables();
fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].flattenVariables(0);
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("ftree")>=0) {
System.out.println("Parse tree after flattening variable references.");
fRB.fTreeRoots[fRootIx].printTree(true);

View file

@ -10,6 +10,7 @@ package com.ibm.icu.dev.test.rbbi;
import java.text.CharacterIterator;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
@ -984,6 +985,23 @@ public class RBBITest extends CoreTestFmwk {
fail("TestBug22585: Unexpected exception while new RuleBasedBreakIterator() with unpaired low surrogate: " + e);
}
}
@Test
public void TestBug22602() {
try {
char[] charArray = new char[25000];
Arrays.fill(charArray, 'A');
charArray[charArray.length-1] = ';';
String rules = new String(charArray);
RuleBasedBreakIterator bi = new RuleBasedBreakIterator(rules);
fail("TestBug22602: RuleBasedBreakIterator() failed to throw an exception with a long string followed by a ';'.");
}
catch (IllegalArgumentException e) {
// expected exception with a long string followed by a ';'.
}
catch(StackOverflowError e) {
fail("TestBug22602: Unexpected exception while new RuleBasedBreakIterator() with a long string followed by a ';': " + e);
}
}
/* Test preceding(index) and following(index), with semi-random indexes.
* The random indexes are produced in clusters that are relatively closely spaced,
* to increase the occurrences of hits to the internal break cache.