mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-22584 Fix RBBI rule builder stack overflow.
The problem was found by fuzz testing. A rule consisting of a long literal string produces a large, unbalanced parse tree, one node per string element. Deleting the tree was recursive, once per node, resulting in deep recursion. This PR changes node deletion to use an iterative (non-recursive) approach. This change only affects rule building. There is no change to the RBBI run time using pre-built rules.
This commit is contained in:
parent
da83309900
commit
e6892996b1
4 changed files with 66 additions and 4 deletions
|
@ -123,19 +123,64 @@ RBBINode::~RBBINode() {
|
|||
break;
|
||||
|
||||
default:
|
||||
delete fLeftChild;
|
||||
// Avoid using a recursive implementation because of stack overflow problems.
|
||||
// See bug ICU-22584.
|
||||
// delete fLeftChild;
|
||||
NRDeleteNode(fLeftChild);
|
||||
fLeftChild = nullptr;
|
||||
delete fRightChild;
|
||||
// delete fRightChild;
|
||||
NRDeleteNode(fRightChild);
|
||||
fRightChild = nullptr;
|
||||
}
|
||||
|
||||
|
||||
delete fFirstPosSet;
|
||||
delete fLastPosSet;
|
||||
delete fFollowPos;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Non-recursive delete of a node + its children. Used from the node destructor
|
||||
* instead of the more obvious recursive implementation to avoid problems with
|
||||
* stack overflow with some perverse test rule data (from fuzzing).
|
||||
*/
|
||||
void RBBINode::NRDeleteNode(RBBINode *node) {
|
||||
if (node == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
RBBINode *stopNode = node->fParent;
|
||||
RBBINode *nextNode = node;
|
||||
while (nextNode != stopNode) {
|
||||
RBBINode *currentNode = nextNode;
|
||||
|
||||
if ((currentNode->fLeftChild == nullptr && currentNode->fRightChild == nullptr) ||
|
||||
currentNode->fType == varRef || // varRef and setRef nodes do not
|
||||
currentNode->fType == setRef) { // own their children nodes.
|
||||
// CurrentNode is effectively a leaf node; it's safe to go ahead and delete it.
|
||||
nextNode = currentNode->fParent;
|
||||
if (nextNode->fLeftChild == currentNode) {
|
||||
nextNode->fLeftChild = nullptr;
|
||||
} else if (nextNode->fRightChild == currentNode) {
|
||||
nextNode->fRightChild = nullptr;
|
||||
}
|
||||
delete currentNode;
|
||||
} else if (currentNode->fLeftChild) {
|
||||
nextNode = currentNode->fLeftChild;
|
||||
if (nextNode->fParent == nullptr) {
|
||||
nextNode->fParent = currentNode;
|
||||
// fParent isn't always set; do it now if not.
|
||||
}
|
||||
U_ASSERT(nextNode->fParent == currentNode);
|
||||
} else if (currentNode->fRightChild) {
|
||||
nextNode = currentNode->fRightChild;
|
||||
if (nextNode->fParent == nullptr) {
|
||||
nextNode->fParent = currentNode;
|
||||
// fParent isn't always set; do it now if not.
|
||||
}
|
||||
U_ASSERT(nextNode->fParent == currentNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
|
|
|
@ -94,6 +94,7 @@ class RBBINode : public UMemory {
|
|||
RBBINode(NodeType t);
|
||||
RBBINode(const RBBINode &other);
|
||||
~RBBINode();
|
||||
static void NRDeleteNode(RBBINode *node);
|
||||
|
||||
RBBINode *cloneTree();
|
||||
RBBINode *flattenVariables();
|
||||
|
|
|
@ -144,6 +144,7 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
|
|||
TESTCASE_AUTO(TestRandomAccess);
|
||||
TESTCASE_AUTO(TestExternalBreakEngineWithFakeTaiLe);
|
||||
TESTCASE_AUTO(TestExternalBreakEngineWithFakeYue);
|
||||
TESTCASE_AUTO(TestBug22584);
|
||||
|
||||
#if U_ENABLE_TRACING
|
||||
TESTCASE_AUTO(TestTraceCreateCharacter);
|
||||
|
@ -5858,4 +5859,18 @@ void RBBITest::TestExternalBreakEngineWithFakeTaiLe() {
|
|||
expected2 == actual2);
|
||||
}
|
||||
|
||||
void RBBITest::TestBug22584() {
|
||||
// Creating a break iterator from a rule consisting of a very long
|
||||
// literal input string caused a stack overflow when deleting the
|
||||
// parse tree for the input during the rule building process.
|
||||
|
||||
// Failure of this test showed as a crash during the break iterator construction.
|
||||
|
||||
UnicodeString ruleStr(100000, (UChar32)0, 100000);
|
||||
UParseError pe {};
|
||||
UErrorCode ec {U_ZERO_ERROR};
|
||||
|
||||
RuleBasedBreakIterator bi(ruleStr, pe, ec);
|
||||
}
|
||||
|
||||
#endif // #if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
|
|
@ -98,6 +98,7 @@ public:
|
|||
void TestRandomAccess();
|
||||
void TestExternalBreakEngineWithFakeTaiLe();
|
||||
void TestExternalBreakEngineWithFakeYue();
|
||||
void TestBug22584();
|
||||
|
||||
#if U_ENABLE_TRACING
|
||||
void TestTraceCreateCharacter();
|
||||
|
|
Loading…
Add table
Reference in a new issue