ICU-22584 Fix RBBI rule builder stack overflow.

The problem was found by fuzz testing.

A rule consisting of a long literal string produces a large, unbalanced parse tree,
one node per string element. Deleting the tree was recursive, once per node, resulting
in deep recursion.

This PR changes node deletion to use an iterative (non-recursive) approach.

This change only affects rule building. There is no change to the RBBI run time
using pre-built rules.
This commit is contained in:
Andy Heninger 2023-12-05 20:53:00 -08:00 committed by Frank Yung-Fong Tang
parent da83309900
commit e6892996b1
4 changed files with 66 additions and 4 deletions

View file

@ -123,19 +123,64 @@ RBBINode::~RBBINode() {
break;
default:
delete fLeftChild;
// Avoid using a recursive implementation because of stack overflow problems.
// See bug ICU-22584.
// delete fLeftChild;
NRDeleteNode(fLeftChild);
fLeftChild = nullptr;
delete fRightChild;
// delete fRightChild;
NRDeleteNode(fRightChild);
fRightChild = nullptr;
}
delete fFirstPosSet;
delete fLastPosSet;
delete fFollowPos;
}
/**
* Non-recursive delete of a node + its children. Used from the node destructor
* instead of the more obvious recursive implementation to avoid problems with
* stack overflow with some perverse test rule data (from fuzzing).
*/
void RBBINode::NRDeleteNode(RBBINode *node) {
if (node == nullptr) {
return;
}
RBBINode *stopNode = node->fParent;
RBBINode *nextNode = node;
while (nextNode != stopNode) {
RBBINode *currentNode = nextNode;
if ((currentNode->fLeftChild == nullptr && currentNode->fRightChild == nullptr) ||
currentNode->fType == varRef || // varRef and setRef nodes do not
currentNode->fType == setRef) { // own their children nodes.
// CurrentNode is effectively a leaf node; it's safe to go ahead and delete it.
nextNode = currentNode->fParent;
if (nextNode->fLeftChild == currentNode) {
nextNode->fLeftChild = nullptr;
} else if (nextNode->fRightChild == currentNode) {
nextNode->fRightChild = nullptr;
}
delete currentNode;
} else if (currentNode->fLeftChild) {
nextNode = currentNode->fLeftChild;
if (nextNode->fParent == nullptr) {
nextNode->fParent = currentNode;
// fParent isn't always set; do it now if not.
}
U_ASSERT(nextNode->fParent == currentNode);
} else if (currentNode->fRightChild) {
nextNode = currentNode->fRightChild;
if (nextNode->fParent == nullptr) {
nextNode->fParent = currentNode;
// fParent isn't always set; do it now if not.
}
U_ASSERT(nextNode->fParent == currentNode);
}
}
}
//-------------------------------------------------------------------------
//

View file

@ -94,6 +94,7 @@ class RBBINode : public UMemory {
RBBINode(NodeType t);
RBBINode(const RBBINode &other);
~RBBINode();
static void NRDeleteNode(RBBINode *node);
RBBINode *cloneTree();
RBBINode *flattenVariables();

View file

@ -144,6 +144,7 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
TESTCASE_AUTO(TestRandomAccess);
TESTCASE_AUTO(TestExternalBreakEngineWithFakeTaiLe);
TESTCASE_AUTO(TestExternalBreakEngineWithFakeYue);
TESTCASE_AUTO(TestBug22584);
#if U_ENABLE_TRACING
TESTCASE_AUTO(TestTraceCreateCharacter);
@ -5858,4 +5859,18 @@ void RBBITest::TestExternalBreakEngineWithFakeTaiLe() {
expected2 == actual2);
}
void RBBITest::TestBug22584() {
// Creating a break iterator from a rule consisting of a very long
// literal input string caused a stack overflow when deleting the
// parse tree for the input during the rule building process.
// Failure of this test showed as a crash during the break iterator construction.
UnicodeString ruleStr(100000, (UChar32)0, 100000);
UParseError pe {};
UErrorCode ec {U_ZERO_ERROR};
RuleBasedBreakIterator bi(ruleStr, pe, ec);
}
#endif // #if !UCONFIG_NO_BREAK_ITERATION

View file

@ -98,6 +98,7 @@ public:
void TestRandomAccess();
void TestExternalBreakEngineWithFakeTaiLe();
void TestExternalBreakEngineWithFakeYue();
void TestBug22584();
#if U_ENABLE_TRACING
void TestTraceCreateCharacter();