Merge branch 'coverage'

This commit is contained in:
Sebastian Pipping 2017-07-22 23:34:06 +02:00
commit 611bf03530
7 changed files with 7109 additions and 255 deletions

3
expat/.gitignore vendored
View file

@ -28,3 +28,6 @@ expat.pc
/callgraph.svg
/libexpat.so.*
/run.sh
build__R*
coverage__R*
source__R*

3
expat/clean_coverage.sh Executable file
View file

@ -0,0 +1,3 @@
rm -r build__*
rm -r coverage__*
rm -r source__*

View file

@ -1870,9 +1870,22 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
if (errorCode == XML_ERROR_NONE) {
switch (ps_parsing) {
case XML_SUSPENDED:
/* It is hard to be certain, but it seems that this case
* cannot occur. This code is cleaning up a previous parse
* with no new data (since len == 0). Changing the parsing
* state requires getting to execute a handler function, and
* there doesn't seem to be an opportunity for that while in
* this circumstance.
*
* Given the uncertainty, we retain the code but exclude it
* from coverage tests.
*
* LCOV_EXCL_START
*/
XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
positionPtr = bufferPtr;
return XML_STATUS_SUSPENDED;
/* LCOV_EXCL_STOP */
case XML_INITIALIZED:
case XML_PARSING:
ps_parsing = XML_FINISHED;
@ -3061,9 +3074,17 @@ doContent(XML_Parser parser,
return XML_ERROR_NO_MEMORY;
break;
default:
/* All of the tokens produced by XmlContentTok() have their own
* explicit cases, so this default is not strictly necessary.
* However it is a useful safety net, so we retain the code and
* simply exclude it from the coverage tests.
*
* LCOV_EXCL_START
*/
if (defaultHandler)
reportDefault(parser, enc, s, next);
break;
/* LCOV_EXCL_STOP */
}
*eventPP = s = next;
switch (ps_parsing) {
@ -3342,8 +3363,23 @@ storeAtts(XML_Parser parser, const ENCODING *enc,
((XML_Char *)s)[-1] = 0; /* clear flag */
id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
if (!id || !id->prefix)
return XML_ERROR_NO_MEMORY;
if (!id || !id->prefix) {
/* This code is walking through the appAtts array, dealing
* with (in this case) a prefixed attribute name. To be in
* the array, the attribute must have already been bound, so
* has to have passed through the hash table lookup once
* already. That implies that an entry for it already
* exists, so the lookup above will return a pointer to
* already allocated memory. There is no opportunaity for
* the allocator to fail, so the condition above cannot be
* fulfilled.
*
* Since it is difficult to be certain that the above
* analysis is complete, we retain the test and merely
* remove the code from coverage tests.
*/
return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
}
b = id->prefix->binding;
if (!b)
return XML_ERROR_UNBOUND_PREFIX;
@ -3720,8 +3756,16 @@ doCdataSection(XML_Parser parser,
}
return XML_ERROR_UNCLOSED_CDATA_SECTION;
default:
/* Every token returned by XmlCdataSectionTok() has its own
* explicit case, so this default case will never be executed.
* We retain it as a safety net and exclude it from the coverage
* statistics.
*
* LCOV_EXCL_START
*/
*eventPP = next;
return XML_ERROR_UNEXPECTED_STATE;
/* LCOV_EXCL_STOP */
}
*eventPP = s = next;
@ -3781,8 +3825,20 @@ doIgnoreSection(XML_Parser parser,
eventEndPP = &eventEndPtr;
}
else {
/* It's not entirely clear, but it seems the following two lines
* of code cannot be executed. The only occasions on which 'enc'
* is not 'parser->m_encoding' are when this function is called
* from the internal entity processing, and IGNORE sections are an
* error in internal entities.
*
* Since it really isn't clear that this is true, we keep the code
* and just remove it from our coverage tests.
*
* LCOV_EXCL_START
*/
eventPP = &(openInternalEntities->internalEventPtr);
eventEndPP = &(openInternalEntities->internalEventEndPtr);
/* LCOV_EXCL_STOP */
}
*eventPP = s;
*startPtr = NULL;
@ -3815,8 +3871,16 @@ doIgnoreSection(XML_Parser parser,
}
return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
default:
/* All of the tokens that XmlIgnoreSectionTok() returns have
* explicit cases to handle them, so this default case is never
* executed. We keep it as a safety net anyway, and remove it
* from our test coverage statistics.
*
* LCOV_EXCL_START
*/
*eventPP = next;
return XML_ERROR_UNEXPECTED_STATE;
/* LCOV_EXCL_STOP */
}
/* not reached */
}
@ -4058,15 +4122,14 @@ entityValueInitProcessor(XML_Parser parser,
result = processXmlDecl(parser, 0, start, next);
if (result != XML_ERROR_NONE)
return result;
switch (ps_parsing) {
case XML_SUSPENDED:
*nextPtr = next;
return XML_ERROR_NONE;
case XML_FINISHED:
/* At this point, ps_parsing cannot be XML_SUSPENDED. For that
* to happen, a parameter entity parsing handler must have
* attempted to suspend the parser, which fails and raises an
* error. The parser can be aborted, but can't be suspended.
*/
if (ps_parsing == XML_FINISHED)
return XML_ERROR_ABORTED;
default:
*nextPtr = next;
}
*nextPtr = next;
/* stop scanning for text declaration - we found one */
processor = entityValueProcessor;
return entityValueProcessor(parser, next, end, nextPtr);
@ -4389,8 +4452,14 @@ doProlog(XML_Parser parser,
&dtd->paramEntities,
externalSubsetName,
sizeof(ENTITY));
if (!entity)
return XML_ERROR_NO_MEMORY;
if (!entity) {
/* The external subset name "#" will have already been
* inserted into the hash table at the start of the
* external entity parsing, so no allocation will happen
* and lookup() cannot fail.
*/
return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
}
if (useForeignDTD)
entity->base = curBase;
dtd->paramEntityRead = XML_FALSE;
@ -4950,8 +5019,29 @@ doProlog(XML_Parser parser,
: !dtd->hasParamEntityRefs)) {
if (!entity)
return XML_ERROR_UNDEFINED_ENTITY;
else if (!entity->is_internal)
return XML_ERROR_ENTITY_DECLARED_IN_PE;
else if (!entity->is_internal) {
/* It's hard to exhaustively search the code to be sure,
* but there doesn't seem to be a way of executing the
* following line. There are two cases:
*
* If 'standalone' is false, the DTD must have no
* parameter entities or we wouldn't have passed the outer
* 'if' statement. That measn the only entity in the hash
* table is the external subset name "#" which cannot be
* given as a parameter entity name in XML syntax, so the
* lookup must have returned NULL and we don't even reach
* the test for an internal entity.
*
* If 'standalone' is true, it does not seem to be
* possible to create entities taking this code path that
* are not internal entities, so fail the test above.
*
* Because this analysis is very uncertain, the code is
* being left in place and merely removed from the
* coverage test statistics.
*/
return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
}
}
else if (!entity) {
dtd->keepProcessing = dtd->standalone;
@ -5423,11 +5513,15 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
&& (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
break;
n = XmlEncode(n, (ICHAR *)buf);
if (!n) {
if (enc == encoding)
eventPtr = ptr;
return XML_ERROR_BAD_CHAR_REF;
}
/* The XmlEncode() functions can never return 0 here. That
* error return happens if the code point passed in is either
* negative or greater than or equal to 0x110000. The
* XmlCharRefNumber() functions will all return a number
* strictly less than 0x110000 or a negative value if an error
* occurred. The negative value is intercepted above, so
* XmlEncode() is never passed a value it might return an
* error for.
*/
for (i = 0; i < n; i++) {
if (!poolAppendChar(pool, buf[i]))
return XML_ERROR_NO_MEMORY;
@ -5501,8 +5595,26 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
break;
}
if (entity->open) {
if (enc == encoding)
eventPtr = ptr;
if (enc == encoding) {
/* It does not appear that this line can be executed.
*
* The "if (entity->open)" check catches recursive entity
* definitions. In order to be called with an open
* entity, it must have gone through this code before and
* been through the recursive call to
* appendAttributeValue() some lines below. That call
* sets the local encoding ("enc") to the parser's
* internal encoding (internal_utf8 or internal_utf16),
* which can never be the same as the principle encoding.
* It doesn't appear there is another code path that gets
* here with entity->open being TRUE.
*
* Since it is not certain that this logic is watertight,
* we keep the line and merely exclude it from coverage
* tests.
*/
eventPtr = ptr; /* LCOV_EXCL_LINE */
}
return XML_ERROR_RECURSIVE_ENTITY_REF;
}
if (entity->notation) {
@ -5529,9 +5641,21 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
}
break;
default:
/* The only token returned by XmlAttributeValueTok() that does
* not have an explicit case here is XML_TOK_PARTIAL_CHAR.
* Getting that would require an entity name to contain an
* incomplete XML character (e.g. \xE2\x82); however previous
* tokenisers will have already recognised and rejected such
* names before XmlAttributeValueTok() gets a look-in. This
* default case should be retained as a safety net, but the code
* excluded from coverage tests.
*
* LCOV_EXCL_START
*/
if (enc == encoding)
eventPtr = ptr;
return XML_ERROR_UNEXPECTED_STATE;
/* LCOV_EXCL_STOP */
}
ptr = next;
}
@ -5664,12 +5788,15 @@ storeEntityValue(XML_Parser parser,
goto endEntityValue;
}
n = XmlEncode(n, (ICHAR *)buf);
if (!n) {
if (enc == encoding)
eventPtr = entityTextPtr;
result = XML_ERROR_BAD_CHAR_REF;
goto endEntityValue;
}
/* The XmlEncode() functions can never return 0 here. That
* error return happens if the code point passed in is either
* negative or greater than or equal to 0x110000. The
* XmlCharRefNumber() functions will all return a number
* strictly less than 0x110000 or a negative value if an error
* occurred. The negative value is intercepted above, so
* XmlEncode() is never passed a value it might return an
* error for.
*/
for (i = 0; i < n; i++) {
if (pool->end == pool->ptr && !poolGrow(pool)) {
result = XML_ERROR_NO_MEMORY;
@ -5690,10 +5817,18 @@ storeEntityValue(XML_Parser parser,
result = XML_ERROR_INVALID_TOKEN;
goto endEntityValue;
default:
/* This default case should be unnecessary -- all the tokens
* that XmlEntityValueTok() can return have their own explicit
* cases -- but should be retained for safety. We do however
* exclude it from the coverage statistics.
*
* LCOV_EXCL_START
*/
if (enc == encoding)
eventPtr = entityTextPtr;
result = XML_ERROR_UNEXPECTED_STATE;
goto endEntityValue;
/* LCOV_EXCL_STOP */
}
entityTextPtr = next;
}
@ -5791,8 +5926,25 @@ reportDefault(XML_Parser parser, const ENCODING *enc,
eventEndPP = &eventEndPtr;
}
else {
/* To get here, two things must be true; the parser must be
* using a character encoding that is not the same as the
* encoding passed in, and the encoding passed in must need
* conversion to the internal format (UTF-8 unless XML_UNICODE
* is defined). The only occasions on which the encoding passed
* in is not the same as the parser's encoding are when it is
* the internal encoding (e.g. a previously defined parameter
* entity, already converted to internal format). This by
* definition doesn't need conversion, so the whole branch never
* gets executed.
*
* For safety's sake we don't delete these lines and merely
* exclude them from coverage statistics.
*
* LCOV_EXCL_START
*/
eventPP = &(openInternalEntities->internalEventPtr);
eventEndPP = &(openInternalEntities->internalEventEndPtr);
/* LCOV_EXCL_STOP */
}
do {
ICHAR *dataPtr = (ICHAR *)dataBuf;
@ -5961,9 +6113,30 @@ getContext(XML_Parser parser)
len = dtd->defaultPrefix.binding->uriLen;
if (namespaceSeparator)
len--;
for (i = 0; i < len; i++)
if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i]))
return NULL;
for (i = 0; i < len; i++) {
if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) {
/* Because of memory caching, I don't believe this line can be
* executed.
*
* This is part of a loop copying the default prefix binding
* URI into the parser's temporary string pool. Previously,
* that URI was copied into the same string pool, with a
* terminating NUL character, as part of setContext(). When
* the pool was cleared, that leaves a block definitely big
* enough to hold the URI on the free block list of the pool.
* The URI copy in getContext() therefore cannot run out of
* memory.
*
* If the pool is used between the setContext() and
* getContext() calls, the worst it can do is leave a bigger
* block on the front of the free list. Given that this is
* all somewhat inobvious and program logic can be changed, we
* don't delete the line but we do exclude it from the test
* coverage statistics.
*/
return NULL; /* LCOV_EXCL_LINE */
}
}
needSep = XML_TRUE;
}
@ -5975,8 +6148,15 @@ getContext(XML_Parser parser)
PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
if (!prefix)
break;
if (!prefix->binding)
continue;
if (!prefix->binding) {
/* This test appears to be (justifiable) paranoia. There does
* not seem to be a way of injecting a prefix without a binding
* that doesn't get errored long before this function is called.
* The test should remain for safety's sake, so we instead
* exclude the following line from the coverage statistics.
*/
continue; /* LCOV_EXCL_LINE */
}
if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
return NULL;
for (s = prefix->name; *s; s++)
@ -6647,8 +6827,20 @@ poolCopyString(STRING_POOL *pool, const XML_Char *s)
static const XML_Char *
poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
{
if (!pool->ptr && !poolGrow(pool))
return NULL;
if (!pool->ptr && !poolGrow(pool)) {
/* The following line is unreachable given the current usage of
* poolCopyStringN(). Currently it is called from exactly one
* place to copy the text of a simple general entity. By that
* point, the name of the entity is already stored in the pool, so
* pool->ptr cannot be NULL.
*
* If poolCopyStringN() is used elsewhere as it well might be,
* this line may well become executable again. Regardless, this
* sort of check shouldn't be removed lightly, so we just exclude
* it from the coverage statistics.
*/
return NULL; /* LCOV_EXCL_LINE */
}
for (; n > 0; --n, s++) {
if (!poolAppendChar(pool, *s))
return NULL;
@ -6745,8 +6937,15 @@ poolGrow(STRING_POOL *pool)
// to avoid dangling pointers:
const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
if (blockSize < 0)
return XML_FALSE;
if (blockSize < 0) {
/* This condition traps a situation where either more than
* INT_MAX/2 bytes have already been allocated. This isn't
* readily testable, since it is unlikely that an average
* machine will have that much memory, so we exclude it from the
* coverage statistics.
*/
return XML_FALSE; /* LCOV_EXCL_LINE */
}
bytesToAllocate = poolBytesToAllocateFor(blockSize);
if (bytesToAllocate == 0)
@ -6767,8 +6966,18 @@ poolGrow(STRING_POOL *pool)
int blockSize = (int)(pool->end - pool->start);
size_t bytesToAllocate;
if (blockSize < 0)
return XML_FALSE;
if (blockSize < 0) {
/* This condition traps a situation where either more than
* INT_MAX bytes have already been allocated (which is prevented
* by various pieces of program logic, not least this one, never
* mind the unlikelihood of actually having that much memory) or
* the pool control fields have been corrupted (which could
* conceivably happen in an extremely buggy user handler
* function). Either way it isn't readily testable, so we
* exclude it from the coverage statistics.
*/
return XML_FALSE; /* LCOV_EXCL_LINE */
}
if (blockSize < INIT_BLOCK_SIZE)
blockSize = INIT_BLOCK_SIZE;

View file

@ -170,7 +170,14 @@ prolog1(PROLOG_STATE *state,
case XML_TOK_COMMENT:
return XML_ROLE_COMMENT;
case XML_TOK_BOM:
return XML_ROLE_NONE;
/* This case can never arise. To reach this role function, the
* parse must have passed through prolog0 and therefore have had
* some form of input, even if only a space. At that point, a
* byte order mark is no longer a valid character (though
* technically it should be interpreted as a non-breaking space),
* so will be rejected by the tokenizing stages.
*/
return XML_ROLE_NONE; /* LCOV_EXCL_LINE */
case XML_TOK_DECL_OPEN:
if (!XmlNameMatchesAscii(enc,
ptr + 2 * MIN_BYTES_PER_CHAR(enc),
@ -1285,6 +1292,26 @@ declClose(PROLOG_STATE *state,
return common(state, tok);
}
/* This function will only be invoked if the internal logic of the
* parser has broken down. It is used in two cases:
*
* 1: When the XML prolog has been finished. At this point the
* processor (the parser level above these role handlers) should
* switch from prologProcessor to contentProcessor and reinitialise
* the handler function.
*
* 2: When an error has been detected (via common() below). At this
* point again the processor should be switched to errorProcessor,
* which will never call a handler.
*
* The result of this is that error() can only be called if the
* processor switch failed to happen, which is an internal error and
* therefore we shouldn't be able to provoke it simply by using the
* library. It is a necessary backstop, however, so we merely exclude
* it from the coverage statistics.
*
* LCOV_EXCL_START
*/
static int PTRCALL
error(PROLOG_STATE *UNUSED_P(state),
int UNUSED_P(tok),
@ -1294,6 +1321,7 @@ error(PROLOG_STATE *UNUSED_P(state),
{
return XML_ROLE_NONE;
}
/* LCOV_EXCL_STOP */
static int FASTCALL
common(PROLOG_STATE *state, int tok)

View file

@ -1019,7 +1019,11 @@ streqci(const char *s1, const char *s2)
if (ASCII_a <= c1 && c1 <= ASCII_z)
c1 += ASCII_A - ASCII_a;
if (ASCII_a <= c2 && c2 <= ASCII_z)
c2 += ASCII_A - ASCII_a;
/* The following line will never get executed. streqci() is
* only called from two places, both of which guarantee to put
* upper-case strings into s2.
*/
c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */
if (c1 != c2)
return 0;
if (!c1)
@ -1291,7 +1295,7 @@ XmlUtf8Encode(int c, char *buf)
};
if (c < 0)
return 0;
return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */
if (c < min2) {
buf[0] = (char)(c | UTF8_cval1);
return 1;
@ -1314,7 +1318,7 @@ XmlUtf8Encode(int c, char *buf)
buf[3] = (char)((c & 0x3f) | 0x80);
return 4;
}
return 0;
return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */
}
int FASTCALL
@ -1465,6 +1469,9 @@ XmlInitUnknownEncoding(void *mem,
else if (c < 0) {
if (c < -4)
return 0;
/* Multi-byte sequences need a converter function */
if (!convert)
return 0;
e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
e->utf8[i][0] = 0;
e->utf16[i] = 0;

View file

@ -1198,8 +1198,14 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
const char *start;
if (ptr >= end)
return XML_TOK_NONE;
else if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_PARTIAL;
else if (! HAS_CHAR(enc, ptr, end)) {
/* This line cannot be executed. The incoming data has already
* been tokenized once, so incomplete characters like this have
* already been eliminated from the input. Retaining the paranoia
* check is still valuable, however.
*/
return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
}
start = ptr;
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
@ -1258,8 +1264,14 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
const char *start;
if (ptr >= end)
return XML_TOK_NONE;
else if (! HAS_CHAR(enc, ptr, end))
return XML_TOK_PARTIAL;
else if (! HAS_CHAR(enc, ptr, end)) {
/* This line cannot be executed. The incoming data has already
* been tokenized once, so incomplete characters like this have
* already been eliminated from the input. Retaining the paranoia
* check is still valuable, however.
*/
return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
}
start = ptr;
while (HAS_CHAR(enc, ptr, end)) {
switch (BYTE_TYPE(enc, ptr)) {
@ -1614,6 +1626,14 @@ PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
return 0;
}
/* This function does not appear to be called from anywhere within the
* library code. It is used via the macro XmlSameName(), which is
* defined but never used. Since it appears in the encoding function
* table, removing it is not a thing to be undertaken lightly. For
* the moment, we simply exclude it from coverage tests.
*
* LCOV_EXCL_START
*/
static int PTRCALL
PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
{
@ -1677,14 +1697,21 @@ PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
}
/* not reached */
}
/* LCOV_EXCL_STOP */
static int PTRCALL
PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
const char *end1, const char *ptr2)
{
for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
if (end1 - ptr1 < MINBPC(enc))
return 0;
if (end1 - ptr1 < MINBPC(enc)) {
/* This line cannot be executed. THe incoming data has already
* been tokenized once, so imcomplete characters like this have
* already been eliminated from the input. Retaining the
* paranoia check is still valuable, however.
*/
return 0; /* LCOV_EXCL_LINE */
}
if (!CHAR_MATCHES(enc, ptr1, *ptr2))
return 0;
}

File diff suppressed because it is too large Load diff