From 1f84db837b723cdf9e178e58266a8a62623102d5 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 10 Sep 2020 00:55:26 -0700 Subject: [PATCH 1/3] XPath: Restrict AST depth to prevent stack overflow XPath parser and execution engine isn't stackless; the depth of the query controls the amount of C stack space required. This change instruments places in the parser where the control flow can recurse, requiring too much C stack space to produce an AST, or where a stackless parse is used to produce arbitrarily deep AST which will create issues for downstream processing. As a result XPath parser should now be fuzz safe for malicious inputs. --- src/pugixml.cpp | 54 ++++++++++++++++++++++++++++++++++++-- tests/test_xpath_parse.cpp | 22 ++++++++++++++++ 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 4d0c2c9..34e126f 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -11143,6 +11143,14 @@ PUGI__NS_BEGIN } }; + static const size_t xpath_ast_depth_limit = + #ifdef PUGIXML_XPATH_DEPTH_LIMIT + PUGIXML_XPATH_DEPTH_LIMIT + #else + 4096 + #endif + ; + struct xpath_parser { xpath_allocator* _alloc; @@ -11155,6 +11163,8 @@ PUGI__NS_BEGIN char_t _scratch[32]; + size_t _depth; + xpath_ast_node* error(const char* message) { _result->error = message; @@ -11171,6 +11181,11 @@ PUGI__NS_BEGIN return 0; } + xpath_ast_node* error_rec() + { + return error("Exceeded maximum allowed query depth"); + } + void* alloc_node() { return _alloc->allocate(sizeof(xpath_ast_node)); @@ -11563,10 +11578,15 @@ PUGI__NS_BEGIN xpath_ast_node* n = parse_primary_expression(); if (!n) return 0; + size_t old_depth = _depth; + while (_lexer.current() == lex_open_square_brace) { _lexer.next(); + if (++_depth > xpath_ast_depth_limit) + return error_rec(); + if (n->rettype() != xpath_type_node_set) return error("Predicate has to be applied to node set"); @@ -11582,6 +11602,8 @@ PUGI__NS_BEGIN _lexer.next(); } + _depth = old_depth; + return n; } @@ -11733,12 +11755,17 @@ PUGI__NS_BEGIN xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy); if (!n) return 0; + size_t old_depth = _depth; + xpath_ast_node* last = 0; while (_lexer.current() == lex_open_square_brace) { _lexer.next(); + if (++_depth > xpath_ast_depth_limit) + return error_rec(); + xpath_ast_node* expr = parse_expression(); if (!expr) return 0; @@ -11755,6 +11782,8 @@ PUGI__NS_BEGIN last = pred; } + _depth = old_depth; + return n; } @@ -11764,11 +11793,16 @@ PUGI__NS_BEGIN xpath_ast_node* n = parse_step(set); if (!n) return 0; + size_t old_depth = _depth; + while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) { lexeme_t l = _lexer.current(); _lexer.next(); + if (++_depth > xpath_ast_depth_limit) + return error_rec(); + if (l == lex_double_slash) { n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); @@ -11779,6 +11813,8 @@ PUGI__NS_BEGIN if (!n) return 0; } + _depth = old_depth; + return n; } @@ -11964,6 +12000,9 @@ PUGI__NS_BEGIN { _lexer.next(); + if (++_depth > xpath_ast_depth_limit) + return error_rec(); + xpath_ast_node* rhs = parse_path_or_unary_expression(); if (!rhs) return 0; @@ -12009,13 +12048,22 @@ PUGI__NS_BEGIN // | MultiplicativeExpr 'mod' UnaryExpr xpath_ast_node* parse_expression(int limit = 0) { + size_t old_depth = _depth; + + if (++_depth > xpath_ast_depth_limit) + return error_rec(); + xpath_ast_node* n = parse_path_or_unary_expression(); if (!n) return 0; - return parse_expression_rec(n, limit); + n = parse_expression_rec(n, limit); + + _depth = old_depth; + + return n; } - xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result) + xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0) { } @@ -12024,6 +12072,8 @@ PUGI__NS_BEGIN xpath_ast_node* n = parse_expression(); if (!n) return 0; + assert(_depth == 0); + // check if there are unparsed tokens left if (_lexer.current() != lex_eof) return error("Incorrect query"); diff --git a/tests/test_xpath_parse.cpp b/tests/test_xpath_parse.cpp index daa12bf..9621bd6 100644 --- a/tests/test_xpath_parse.cpp +++ b/tests/test_xpath_parse.cpp @@ -381,6 +381,28 @@ TEST(xpath_parse_oom_propagation) } } +static std::basic_string rep(const std::basic_string& base, size_t count) +{ + std::basic_string result; + result.reserve(base.size() * count); + + for (size_t i = 0; i < count; ++i) + result += base; + + return result; +} + +TEST(xpath_parse_depth_limit) +{ + const size_t limit = 5000; + + CHECK_XPATH_FAIL((rep(STR("("), limit) + "1" + rep(STR(")"), limit)).c_str()); + CHECK_XPATH_FAIL(("(id('a'))" + rep(STR("[1]"), limit)).c_str()); + CHECK_XPATH_FAIL(("/foo" + rep(STR("[1]"), limit)).c_str()); + CHECK_XPATH_FAIL(("/foo" + rep(STR("/x"), limit)).c_str()); + CHECK_XPATH_FAIL(("1" + rep(STR("+1"), limit)).c_str()); +} + TEST_XML(xpath_parse_location_path, "") { CHECK_XPATH_NODESET(doc, STR("/node")) % 2; From c7090e6c3329a4d30f69919186276ac237726368 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 10 Sep 2020 01:07:48 -0700 Subject: [PATCH 2/3] Fix tests in wchar mode --- tests/test_xpath_parse.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_xpath_parse.cpp b/tests/test_xpath_parse.cpp index 9621bd6..89c66a3 100644 --- a/tests/test_xpath_parse.cpp +++ b/tests/test_xpath_parse.cpp @@ -396,11 +396,11 @@ TEST(xpath_parse_depth_limit) { const size_t limit = 5000; - CHECK_XPATH_FAIL((rep(STR("("), limit) + "1" + rep(STR(")"), limit)).c_str()); - CHECK_XPATH_FAIL(("(id('a'))" + rep(STR("[1]"), limit)).c_str()); - CHECK_XPATH_FAIL(("/foo" + rep(STR("[1]"), limit)).c_str()); - CHECK_XPATH_FAIL(("/foo" + rep(STR("/x"), limit)).c_str()); - CHECK_XPATH_FAIL(("1" + rep(STR("+1"), limit)).c_str()); + CHECK_XPATH_FAIL((rep(STR("("), limit) + STR("1") + rep(STR(")"), limit)).c_str()); + CHECK_XPATH_FAIL((STR("(id('a'))") + rep(STR("[1]"), limit)).c_str()); + CHECK_XPATH_FAIL((STR("/foo") + rep(STR("[1]"), limit)).c_str()); + CHECK_XPATH_FAIL((STR("/foo") + rep(STR("/x"), limit)).c_str()); + CHECK_XPATH_FAIL((STR("1") + rep(STR("+1"), limit)).c_str()); } TEST_XML(xpath_parse_location_path, "") From 20aef1cd4b436a8859a54b41cc992db517c61d2d Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 10 Sep 2020 09:11:46 -0700 Subject: [PATCH 3/3] Fix stack overflow in tests on MSVC x64 The default stack on MSVC/x64/debug is sufficient for 1692 nested invocations only, whereas on clang/linux it's ~8K... For now set the limit to be conservative. --- src/pugiconfig.hpp | 3 +++ src/pugixml.cpp | 2 +- tests/test_xpath_parse.cpp | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/pugiconfig.hpp b/src/pugiconfig.hpp index 2fb6918..c9c40e2 100644 --- a/src/pugiconfig.hpp +++ b/src/pugiconfig.hpp @@ -40,6 +40,9 @@ // #define PUGIXML_MEMORY_OUTPUT_STACK 10240 // #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096 +// Tune this constant to adjust max nesting for XPath queries +// #define PUGIXML_XPATH_DEPTH_LIMIT 1024 + // Uncomment this to switch to header-only version // #define PUGIXML_HEADER_ONLY diff --git a/src/pugixml.cpp b/src/pugixml.cpp index 34e126f..e166458 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -11147,7 +11147,7 @@ PUGI__NS_BEGIN #ifdef PUGIXML_XPATH_DEPTH_LIMIT PUGIXML_XPATH_DEPTH_LIMIT #else - 4096 + 1024 #endif ; diff --git a/tests/test_xpath_parse.cpp b/tests/test_xpath_parse.cpp index 89c66a3..d0b0bac 100644 --- a/tests/test_xpath_parse.cpp +++ b/tests/test_xpath_parse.cpp @@ -394,7 +394,7 @@ static std::basic_string rep(const std::basic_string& base, size TEST(xpath_parse_depth_limit) { - const size_t limit = 5000; + const size_t limit = 1500; CHECK_XPATH_FAIL((rep(STR("("), limit) + STR("1") + rep(STR(")"), limit)).c_str()); CHECK_XPATH_FAIL((STR("(id('a'))") + rep(STR("[1]"), limit)).c_str());