XPath: Restrict AST depth to prevent stack overflow
XPath parser and execution engine isn't stackless; the depth of the query controls the amount of C stack space required. This change instruments places in the parser where the control flow can recurse, requiring too much C stack space to produce an AST, or where a stackless parse is used to produce arbitrarily deep AST which will create issues for downstream processing. As a result XPath parser should now be fuzz safe for malicious inputs.
This commit is contained in:
parent
22401bafaf
commit
1f84db837b
2 changed files with 74 additions and 2 deletions
|
@ -11143,6 +11143,14 @@ PUGI__NS_BEGIN
|
|||
}
|
||||
};
|
||||
|
||||
static const size_t xpath_ast_depth_limit =
|
||||
#ifdef PUGIXML_XPATH_DEPTH_LIMIT
|
||||
PUGIXML_XPATH_DEPTH_LIMIT
|
||||
#else
|
||||
4096
|
||||
#endif
|
||||
;
|
||||
|
||||
struct xpath_parser
|
||||
{
|
||||
xpath_allocator* _alloc;
|
||||
|
@ -11155,6 +11163,8 @@ PUGI__NS_BEGIN
|
|||
|
||||
char_t _scratch[32];
|
||||
|
||||
size_t _depth;
|
||||
|
||||
xpath_ast_node* error(const char* message)
|
||||
{
|
||||
_result->error = message;
|
||||
|
@ -11171,6 +11181,11 @@ PUGI__NS_BEGIN
|
|||
return 0;
|
||||
}
|
||||
|
||||
xpath_ast_node* error_rec()
|
||||
{
|
||||
return error("Exceeded maximum allowed query depth");
|
||||
}
|
||||
|
||||
void* alloc_node()
|
||||
{
|
||||
return _alloc->allocate(sizeof(xpath_ast_node));
|
||||
|
@ -11563,10 +11578,15 @@ PUGI__NS_BEGIN
|
|||
xpath_ast_node* n = parse_primary_expression();
|
||||
if (!n) return 0;
|
||||
|
||||
size_t old_depth = _depth;
|
||||
|
||||
while (_lexer.current() == lex_open_square_brace)
|
||||
{
|
||||
_lexer.next();
|
||||
|
||||
if (++_depth > xpath_ast_depth_limit)
|
||||
return error_rec();
|
||||
|
||||
if (n->rettype() != xpath_type_node_set)
|
||||
return error("Predicate has to be applied to node set");
|
||||
|
||||
|
@ -11582,6 +11602,8 @@ PUGI__NS_BEGIN
|
|||
_lexer.next();
|
||||
}
|
||||
|
||||
_depth = old_depth;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
@ -11733,12 +11755,17 @@ PUGI__NS_BEGIN
|
|||
xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy);
|
||||
if (!n) return 0;
|
||||
|
||||
size_t old_depth = _depth;
|
||||
|
||||
xpath_ast_node* last = 0;
|
||||
|
||||
while (_lexer.current() == lex_open_square_brace)
|
||||
{
|
||||
_lexer.next();
|
||||
|
||||
if (++_depth > xpath_ast_depth_limit)
|
||||
return error_rec();
|
||||
|
||||
xpath_ast_node* expr = parse_expression();
|
||||
if (!expr) return 0;
|
||||
|
||||
|
@ -11755,6 +11782,8 @@ PUGI__NS_BEGIN
|
|||
last = pred;
|
||||
}
|
||||
|
||||
_depth = old_depth;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
@ -11764,11 +11793,16 @@ PUGI__NS_BEGIN
|
|||
xpath_ast_node* n = parse_step(set);
|
||||
if (!n) return 0;
|
||||
|
||||
size_t old_depth = _depth;
|
||||
|
||||
while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
|
||||
{
|
||||
lexeme_t l = _lexer.current();
|
||||
_lexer.next();
|
||||
|
||||
if (++_depth > xpath_ast_depth_limit)
|
||||
return error_rec();
|
||||
|
||||
if (l == lex_double_slash)
|
||||
{
|
||||
n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
|
||||
|
@ -11779,6 +11813,8 @@ PUGI__NS_BEGIN
|
|||
if (!n) return 0;
|
||||
}
|
||||
|
||||
_depth = old_depth;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
@ -11964,6 +12000,9 @@ PUGI__NS_BEGIN
|
|||
{
|
||||
_lexer.next();
|
||||
|
||||
if (++_depth > xpath_ast_depth_limit)
|
||||
return error_rec();
|
||||
|
||||
xpath_ast_node* rhs = parse_path_or_unary_expression();
|
||||
if (!rhs) return 0;
|
||||
|
||||
|
@ -12009,13 +12048,22 @@ PUGI__NS_BEGIN
|
|||
// | MultiplicativeExpr 'mod' UnaryExpr
|
||||
xpath_ast_node* parse_expression(int limit = 0)
|
||||
{
|
||||
size_t old_depth = _depth;
|
||||
|
||||
if (++_depth > xpath_ast_depth_limit)
|
||||
return error_rec();
|
||||
|
||||
xpath_ast_node* n = parse_path_or_unary_expression();
|
||||
if (!n) return 0;
|
||||
|
||||
return parse_expression_rec(n, limit);
|
||||
n = parse_expression_rec(n, limit);
|
||||
|
||||
_depth = old_depth;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
|
||||
xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -12024,6 +12072,8 @@ PUGI__NS_BEGIN
|
|||
xpath_ast_node* n = parse_expression();
|
||||
if (!n) return 0;
|
||||
|
||||
assert(_depth == 0);
|
||||
|
||||
// check if there are unparsed tokens left
|
||||
if (_lexer.current() != lex_eof)
|
||||
return error("Incorrect query");
|
||||
|
|
|
@ -381,6 +381,28 @@ TEST(xpath_parse_oom_propagation)
|
|||
}
|
||||
}
|
||||
|
||||
static std::basic_string<char_t> rep(const std::basic_string<char_t>& base, size_t count)
|
||||
{
|
||||
std::basic_string<char_t> result;
|
||||
result.reserve(base.size() * count);
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
result += base;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
TEST(xpath_parse_depth_limit)
|
||||
{
|
||||
const size_t limit = 5000;
|
||||
|
||||
CHECK_XPATH_FAIL((rep(STR("("), limit) + "1" + rep(STR(")"), limit)).c_str());
|
||||
CHECK_XPATH_FAIL(("(id('a'))" + rep(STR("[1]"), limit)).c_str());
|
||||
CHECK_XPATH_FAIL(("/foo" + rep(STR("[1]"), limit)).c_str());
|
||||
CHECK_XPATH_FAIL(("/foo" + rep(STR("/x"), limit)).c_str());
|
||||
CHECK_XPATH_FAIL(("1" + rep(STR("+1"), limit)).c_str());
|
||||
}
|
||||
|
||||
TEST_XML(xpath_parse_location_path, "<node><child/></node>")
|
||||
{
|
||||
CHECK_XPATH_NODESET(doc, STR("/node")) % 2;
|
||||
|
|
Loading…
Add table
Reference in a new issue