From 23110a864d478917c1db97fc988d7bb71bd3372e Mon Sep 17 00:00:00 2001
From: Sebastian Pipping
Date: Wed, 4 Oct 2023 16:21:27 +0200
Subject: [PATCH] Be stricter about macro XML_CONTEXT_BYTES
- Start treating -DXML_CONTEXT_BYTES=0 as "no context"
rather than "context of size 0". Was documented as
"must be set to a positive integer", previously.
- Enforce that macro XML_CONTEXT_BYTES is defined at build time to
avoid accidental misbuilds lacking context in environments that
bypass both of Expats official build systems.
- Detect and reject use of negative context size at compile time.
---
.github/workflows/linux.yml | 2 +-
expat/CMakeLists.txt | 5 ++++-
expat/configure.ac | 7 ++++---
expat/doc/reference.html | 6 +++---
expat/expat_config.h.cmake | 4 ++--
expat/lib/expat.h | 2 +-
expat/lib/xmlparse.c | 31 ++++++++++++++++++-------------
expat/tests/basic_tests.c | 8 ++++----
expat/tests/handlers.c | 2 +-
9 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index a3a0dca2..53441950 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -56,7 +56,7 @@ jobs:
- MODE: qa-sh
FLAT_ENV: CC=clang CXX=clang++ LD=clang++ QA_SANITIZER=address CMAKE_ARGS=-DEXPAT_ATTR_INFO=ON
- MODE: qa-sh
- FLAT_ENV: CC=clang CXX=clang++ LD=clang++ QA_SANITIZER=address CMAKE_ARGS=-DEXPAT_CONTEXT_BYTES=OFF
+ FLAT_ENV: CC=clang CXX=clang++ LD=clang++ QA_SANITIZER=address CMAKE_ARGS=-DEXPAT_CONTEXT_BYTES=0
- MODE: qa-sh
FLAT_ENV: CC=clang CXX=clang++ LD=clang++ QA_SANITIZER=address CMAKE_ARGS=-DEXPAT_DTD=OFF
- MODE: qa-sh
diff --git a/expat/CMakeLists.txt b/expat/CMakeLists.txt
index 87356fe2..ea32106b 100644
--- a/expat/CMakeLists.txt
+++ b/expat/CMakeLists.txt
@@ -139,7 +139,7 @@ if(UNIX OR _EXPAT_HELP)
expat_shy_set(EXPAT_WITH_LIBBSD OFF CACHE BOOL "Utilize libbsd (for arc4random_buf)")
endif()
expat_shy_set(EXPAT_ENABLE_INSTALL ON CACHE BOOL "Install expat files in cmake install target")
-expat_shy_set(EXPAT_CONTEXT_BYTES 1024 CACHE STRING "Define to specify how much context to retain around the current parse point")
+expat_shy_set(EXPAT_CONTEXT_BYTES 1024 CACHE STRING "Define to specify how much context to retain around the current parse point, 0 to disable")
mark_as_advanced(EXPAT_CONTEXT_BYTES)
expat_shy_set(EXPAT_DTD ON CACHE BOOL "Define to make parameter entity parsing functionality available")
mark_as_advanced(EXPAT_DTD)
@@ -287,6 +287,9 @@ _expat_copy_bool_int(EXPAT_NS XML_NS)
if(NOT WIN32)
_expat_copy_bool_int(EXPAT_DEV_URANDOM XML_DEV_URANDOM)
endif()
+if(NOT EXPAT_CONTEXT_BYTES GREATER 0) # in particular with -DEXPAT_CONTEXT_BYTES=OFF
+ set(EXPAT_CONTEXT_BYTES 0)
+endif()
set(XML_CONTEXT_BYTES ${EXPAT_CONTEXT_BYTES})
macro(expat_install)
diff --git a/expat/configure.ac b/expat/configure.ac
index 0902c74e..c9f95bca 100644
--- a/expat/configure.ac
+++ b/expat/configure.ac
@@ -327,9 +327,10 @@ AS_HELP_STRING([--disable-xml-context],
AS_IF([test "x${enable_xml_context}" != "xno"],
[AS_IF([test "x${enable_xml_context}" = "xyes" \
-o "x${enable_xml_context}" = "x"],
- [AS_VAR_SET(enable_xml_context,1024)])
- AC_DEFINE_UNQUOTED([XML_CONTEXT_BYTES], [${enable_xml_context}],
- [Define to specify how much context to retain around the current parse point.])])
+ [AS_VAR_SET(enable_xml_context,1024)])],
+ [AS_VAR_SET(enable_xml_context,0)])
+AC_DEFINE_UNQUOTED([XML_CONTEXT_BYTES], [${enable_xml_context}],
+ [Define to specify how much context to retain around the current parse point, 0 to disable.])
AC_ARG_WITH([docbook],
[AS_HELP_STRING([--with-docbook],
diff --git a/expat/doc/reference.html b/expat/doc/reference.html
index f20dc0f9..2df60bf0 100644
--- a/expat/doc/reference.html
+++ b/expat/doc/reference.html
@@ -404,8 +404,8 @@ off by default.
The number of input bytes of markup context which the parser will
ensure are available for reporting via XML_GetInputContext
. This is
-normally set to 1024, and must be set to a positive integer. If this
-is not defined, the input context will not be available and XML_GetInputContext
will
always report NULL
. Without this, Expat has a smaller memory
footprint and can be faster.
@@ -2136,7 +2136,7 @@ untranslated bytes of the input.
triggering a call spans over a very large amount of input, the actual
parse position may be before the beginning of the buffer.
-If XML_CONTEXT_BYTES
is not defined, this will always
+
If XML_CONTEXT_BYTES
is zero, this will always
return NULL
.
diff --git a/expat/expat_config.h.cmake b/expat/expat_config.h.cmake
index 10b8827b..a290fafb 100644
--- a/expat/expat_config.h.cmake
+++ b/expat/expat_config.h.cmake
@@ -94,8 +94,8 @@
#cmakedefine XML_ATTR_INFO
/* Define to specify how much context to retain around the current parse
- point. */
-#cmakedefine XML_CONTEXT_BYTES @XML_CONTEXT_BYTES@
+ point, 0 to disable. */
+#define XML_CONTEXT_BYTES @XML_CONTEXT_BYTES@
#if ! defined(_WIN32)
/* Define to include code reading entropy from `/dev/urandom'. */
diff --git a/expat/lib/expat.h b/expat/lib/expat.h
index ab20f125..f5f2025e 100644
--- a/expat/lib/expat.h
+++ b/expat/lib/expat.h
@@ -951,7 +951,7 @@ XMLPARSEAPI(XML_Index) XML_GetCurrentByteIndex(XML_Parser parser);
XMLPARSEAPI(int)
XML_GetCurrentByteCount(XML_Parser parser);
-/* If XML_CONTEXT_BYTES is defined, returns the input buffer, sets
+/* If XML_CONTEXT_BYTES is >=1, returns the input buffer, sets
the integer pointed to by offset to the offset within this buffer
of the current parse position, and sets the integer pointed to by size
to the size of this buffer (the number of input bytes). Otherwise
diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c
index 4d386d11..88e1a74c 100644
--- a/expat/lib/xmlparse.c
+++ b/expat/lib/xmlparse.c
@@ -63,6 +63,11 @@
#include "expat_config.h"
+#if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \
+ || (XML_CONTEXT_BYTES + 0 < 0)
+# error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
+#endif
+
#if defined(HAVE_SYSCALL_GETRANDOM)
# if ! defined(_GNU_SOURCE)
# define _GNU_SOURCE 1 /* syscall prototype */
@@ -1905,7 +1910,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
parser->m_processor = errorProcessor;
return XML_STATUS_ERROR;
}
-#ifndef XML_CONTEXT_BYTES
+#if XML_CONTEXT_BYTES == 0
else if (parser->m_bufferPtr == parser->m_bufferEnd) {
const char *end;
int nLeftOver;
@@ -1976,7 +1981,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
parser->m_eventEndPtr = parser->m_bufferPtr;
return result;
}
-#endif /* not defined XML_CONTEXT_BYTES */
+#endif /* XML_CONTEXT_BYTES == 0 */
else {
void *buff = XML_GetBuffer(parser, len);
if (buff == NULL)
@@ -2072,9 +2077,9 @@ XML_GetBuffer(XML_Parser parser, int len) {
}
if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) {
-#ifdef XML_CONTEXT_BYTES
+#if XML_CONTEXT_BYTES > 0
int keep;
-#endif /* defined XML_CONTEXT_BYTES */
+#endif /* XML_CONTEXT_BYTES > 0 */
/* Do not invoke signed arithmetic overflow: */
int neededSize = (int)((unsigned)len
+ (unsigned)EXPAT_SAFE_PTR_DIFF(
@@ -2083,7 +2088,7 @@ XML_GetBuffer(XML_Parser parser, int len) {
parser->m_errorCode = XML_ERROR_NO_MEMORY;
return NULL;
}
-#ifdef XML_CONTEXT_BYTES
+#if XML_CONTEXT_BYTES > 0
keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
if (keep > XML_CONTEXT_BYTES)
keep = XML_CONTEXT_BYTES;
@@ -2093,10 +2098,10 @@ XML_GetBuffer(XML_Parser parser, int len) {
return NULL;
}
neededSize += keep;
-#endif /* defined XML_CONTEXT_BYTES */
+#endif /* XML_CONTEXT_BYTES > 0 */
if (neededSize
<= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
-#ifdef XML_CONTEXT_BYTES
+#if XML_CONTEXT_BYTES > 0
if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
int offset
= (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
@@ -2117,7 +2122,7 @@ XML_GetBuffer(XML_Parser parser, int len) {
+ EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
parser->m_bufferPtr = parser->m_buffer;
}
-#endif /* not defined XML_CONTEXT_BYTES */
+#endif /* XML_CONTEXT_BYTES > 0 */
} else {
char *newBuf;
int bufferSize
@@ -2138,7 +2143,7 @@ XML_GetBuffer(XML_Parser parser, int len) {
return NULL;
}
parser->m_bufferLim = newBuf + bufferSize;
-#ifdef XML_CONTEXT_BYTES
+#if XML_CONTEXT_BYTES > 0
if (parser->m_bufferPtr) {
memcpy(newBuf, &parser->m_bufferPtr[-keep],
EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
@@ -2168,7 +2173,7 @@ XML_GetBuffer(XML_Parser parser, int len) {
parser->m_bufferEnd = newBuf;
}
parser->m_bufferPtr = parser->m_buffer = newBuf;
-#endif /* not defined XML_CONTEXT_BYTES */
+#endif /* XML_CONTEXT_BYTES > 0 */
}
parser->m_eventPtr = parser->m_eventEndPtr = NULL;
parser->m_positionPtr = NULL;
@@ -2282,7 +2287,7 @@ XML_GetCurrentByteCount(XML_Parser parser) {
const char *XMLCALL
XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
-#ifdef XML_CONTEXT_BYTES
+#if XML_CONTEXT_BYTES > 0
if (parser == NULL)
return NULL;
if (parser->m_eventPtr && parser->m_buffer) {
@@ -2296,7 +2301,7 @@ XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
(void)parser;
(void)offset;
(void)size;
-#endif /* defined XML_CONTEXT_BYTES */
+#endif /* XML_CONTEXT_BYTES > 0 */
return (const char *)0;
}
@@ -2516,7 +2521,7 @@ XML_GetFeatureList(void) {
#ifdef XML_DTD
{XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
#endif
-#ifdef XML_CONTEXT_BYTES
+#if XML_CONTEXT_BYTES > 0
{XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
XML_CONTEXT_BYTES},
#endif
diff --git a/expat/tests/basic_tests.c b/expat/tests/basic_tests.c
index ed2c8f1b..7be42f64 100644
--- a/expat/tests/basic_tests.c
+++ b/expat/tests/basic_tests.c
@@ -2796,7 +2796,7 @@ START_TEST(test_get_buffer_1) {
* between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable,
* with an extra byte just to ensure that the request is off any
* boundary. The request will be inflated internally by
- * XML_CONTEXT_BYTES (if defined), so we subtract that from our
+ * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
* request.
*/
if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
@@ -2832,7 +2832,7 @@ START_TEST(test_get_buffer_2) {
END_TEST
/* Test for signed integer overflow CVE-2022-23852 */
-#if defined(XML_CONTEXT_BYTES)
+#if XML_CONTEXT_BYTES > 0
START_TEST(test_get_buffer_3_overflow) {
XML_Parser parser = XML_ParserCreate(NULL);
assert(parser != NULL);
@@ -2853,7 +2853,7 @@ START_TEST(test_get_buffer_3_overflow) {
XML_ParserFree(parser);
}
END_TEST
-#endif // defined(XML_CONTEXT_BYTES)
+#endif // XML_CONTEXT_BYTES > 0
/* Test position information macros */
START_TEST(test_byte_info_at_end) {
@@ -5239,7 +5239,7 @@ make_basic_test_case(Suite *s) {
tcase_add_test(tc_basic, test_empty_parse);
tcase_add_test(tc_basic, test_get_buffer_1);
tcase_add_test(tc_basic, test_get_buffer_2);
-#if defined(XML_CONTEXT_BYTES)
+#if XML_CONTEXT_BYTES > 0
tcase_add_test(tc_basic, test_get_buffer_3_overflow);
#endif
tcase_add_test(tc_basic, test_byte_info_at_end);
diff --git a/expat/tests/handlers.c b/expat/tests/handlers.c
index 0169ce52..b8cda26c 100644
--- a/expat/tests/handlers.c
+++ b/expat/tests/handlers.c
@@ -1620,7 +1620,7 @@ rsqb_handler(void *userData, const XML_Char *s, int len) {
void XMLCALL
byte_character_handler(void *userData, const XML_Char *s, int len) {
-#ifdef XML_CONTEXT_BYTES
+#if XML_CONTEXT_BYTES > 0
int offset, size;
const char *buffer;
ByteTestData *data = (ByteTestData *)userData;