From d248e7e9234927e32fd4cac77d743ece53086965 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Thu, 28 Sep 2023 15:56:42 +0200 Subject: [PATCH] xmlwf: Support custom buffer size for XML_GetBuffer and read --- expat/xmlwf/xmlfile.c | 10 +++------- expat/xmlwf/xmlfile.h | 2 ++ expat/xmlwf/xmlwf.c | 21 +++++++++++++++++++++ expat/xmlwf/xmlwf_helpgen.py | 1 + 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/expat/xmlwf/xmlfile.c b/expat/xmlwf/xmlfile.c index c55e3417..0cb7e1a2 100644 --- a/expat/xmlwf/xmlfile.c +++ b/expat/xmlwf/xmlfile.c @@ -71,11 +71,7 @@ # endif #endif -#ifdef _DEBUG -# define READ_SIZE 16 -#else -# define READ_SIZE (1024 * 8) -#endif +int g_read_size_bytes = 1024 * 8; typedef struct { XML_Parser parser; @@ -195,7 +191,7 @@ processStream(const XML_Char *filename, XML_Parser parser) { } for (;;) { int nread; - char *buf = (char *)XML_GetBuffer(parser, READ_SIZE); + char *buf = (char *)XML_GetBuffer(parser, g_read_size_bytes); if (! buf) { if (filename != NULL) close(fd); @@ -203,7 +199,7 @@ processStream(const XML_Char *filename, XML_Parser parser) { filename != NULL ? filename : T("xmlwf")); return 0; } - nread = read(fd, buf, READ_SIZE); + nread = read(fd, buf, g_read_size_bytes); if (nread < 0) { tperror(filename != NULL ? filename : T("STDIN")); if (filename != NULL) diff --git a/expat/xmlwf/xmlfile.h b/expat/xmlwf/xmlfile.h index d75dda22..014f1319 100644 --- a/expat/xmlwf/xmlfile.h +++ b/expat/xmlwf/xmlfile.h @@ -42,5 +42,7 @@ # define XML_FMT_INT_MOD "l" #endif +extern int g_read_size_bytes; + extern int XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags); diff --git a/expat/xmlwf/xmlwf.c b/expat/xmlwf/xmlwf.c index 276ddbb5..c84fcd77 100644 --- a/expat/xmlwf/xmlwf.c +++ b/expat/xmlwf/xmlwf.c @@ -899,6 +899,7 @@ usage(const XML_Char *prog, int rc) { T(" -e ENCODING override any in-document [e]ncoding declaration\n") T(" -w enable support for [W]indows code pages\n") T(" -r disable memory-mapping and use [r]ead calls instead\n") + T(" -g BYTES buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)\n") T(" -k when processing multiple files, [k]eep processing after first file with error\n") T("\n") T("output control arguments:\n") @@ -1042,6 +1043,26 @@ tmain(int argc, XML_Char **argv) { case T('v'): showVersion(argv[0]); return 0; + case T('g'): { + const XML_Char *valueText = NULL; + XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j); + + errno = 0; + XML_Char *afterValueText = (XML_Char *)valueText; + const long long read_size_bytes_candidate + = tcstoull(valueText, &afterValueText, 10); + if ((errno != 0) || (afterValueText[0] != T('\0')) + || (read_size_bytes_candidate < 1) + || (read_size_bytes_candidate > (INT_MAX / 2 + 1))) { + // This prevents tperror(..) from reporting misleading "[..]: Success" + errno = ERANGE; + tperror(T("invalid buffer size") T( + " (needs an integer from 1 to INT_MAX/2+1 i.e. 1,073,741,824 on most platforms)")); + exit(XMLWF_EXIT_USAGE_ERROR); + } + g_read_size_bytes = (int)read_size_bytes_candidate; + break; + } case T('k'): continueOnError = 1; j++; diff --git a/expat/xmlwf/xmlwf_helpgen.py b/expat/xmlwf/xmlwf_helpgen.py index 706b3c57..c9ac9a1e 100755 --- a/expat/xmlwf/xmlwf_helpgen.py +++ b/expat/xmlwf/xmlwf_helpgen.py @@ -63,6 +63,7 @@ input_related.add_argument('-x', action='store_true', help='enable processing of input_related.add_argument('-e', action='store', metavar='ENCODING', help='override any in-document [e]ncoding declaration') input_related.add_argument('-w', action='store_true', help='enable support for [W]indows code pages') input_related.add_argument('-r', action='store_true', help='disable memory-mapping and use [r]ead calls instead') +input_related.add_argument('-g', metavar='BYTES', help='buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)') input_related.add_argument('-k', action='store_true', help='when processing multiple files, [k]eep processing after first file with error') output_related = parser.add_argument_group('output control arguments')