mirror of
https://github.com/libexpat/libexpat.git
synced 2025-04-06 05:34:59 +00:00
xmltok: Add more in-code documentation about byte types
This commit is contained in:
parent
e3d6578214
commit
748ac8799d
2 changed files with 39 additions and 37 deletions
|
@ -587,11 +587,13 @@ static const struct normal_encoding ascii_encoding
|
|||
static int PTRFASTCALL
|
||||
unicode_byte_type(char hi, char lo) {
|
||||
switch ((unsigned char)hi) {
|
||||
/* 0xD800–0xDBFF first 16-bit code unit or high surrogate (W1) */
|
||||
case 0xD8:
|
||||
case 0xD9:
|
||||
case 0xDA:
|
||||
case 0xDB:
|
||||
return BT_LEAD4;
|
||||
/* 0xDC00–0xDFFF second 16-bit code unit or low surrogate (W2) */
|
||||
case 0xDC:
|
||||
case 0xDD:
|
||||
case 0xDE:
|
||||
|
@ -599,8 +601,8 @@ unicode_byte_type(char hi, char lo) {
|
|||
return BT_TRAIL;
|
||||
case 0xFF:
|
||||
switch ((unsigned char)lo) {
|
||||
case 0xFF:
|
||||
case 0xFE:
|
||||
case 0xFF: /* noncharacter-FFFF */
|
||||
case 0xFE: /* noncharacter-FFFE */
|
||||
return BT_NONXML;
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -31,43 +31,43 @@
|
|||
*/
|
||||
|
||||
enum {
|
||||
BT_NONXML,
|
||||
BT_MALFORM,
|
||||
BT_LT,
|
||||
BT_AMP,
|
||||
BT_RSQB,
|
||||
BT_LEAD2,
|
||||
BT_LEAD3,
|
||||
BT_LEAD4,
|
||||
BT_TRAIL,
|
||||
BT_CR,
|
||||
BT_LF,
|
||||
BT_GT,
|
||||
BT_QUOT,
|
||||
BT_APOS,
|
||||
BT_EQUALS,
|
||||
BT_QUEST,
|
||||
BT_EXCL,
|
||||
BT_SOL,
|
||||
BT_SEMI,
|
||||
BT_NUM,
|
||||
BT_LSQB,
|
||||
BT_S,
|
||||
BT_NMSTRT,
|
||||
BT_COLON,
|
||||
BT_HEX,
|
||||
BT_DIGIT,
|
||||
BT_NAME,
|
||||
BT_MINUS,
|
||||
BT_NONXML, /* e.g. noncharacter-FFFF */
|
||||
BT_MALFORM, /* illegal, with regard to encoding */
|
||||
BT_LT, /* less than = "<" */
|
||||
BT_AMP, /* ampersand = "&" */
|
||||
BT_RSQB, /* right square bracket = "[" */
|
||||
BT_LEAD2, /* lead byte of a 2-byte UTF-8 character */
|
||||
BT_LEAD3, /* lead byte of a 3-byte UTF-8 character */
|
||||
BT_LEAD4, /* lead byte of a 4-byte UTF-8 character */
|
||||
BT_TRAIL, /* trailing unit, e.g. second 16-bit unit of a 4-byte char. */
|
||||
BT_CR, /* carriage return = "\r" */
|
||||
BT_LF, /* line feed = "\n" */
|
||||
BT_GT, /* greater than = ">" */
|
||||
BT_QUOT, /* quotation character = "\"" */
|
||||
BT_APOS, /* aposthrophe = "'" */
|
||||
BT_EQUALS, /* equal sign = "=" */
|
||||
BT_QUEST, /* question mark = "?" */
|
||||
BT_EXCL, /* exclamation mark = "!" */
|
||||
BT_SOL, /* solidus, slash = "/" */
|
||||
BT_SEMI, /* semicolon = ";" */
|
||||
BT_NUM, /* number sign = "#" */
|
||||
BT_LSQB, /* left square bracket = "[" */
|
||||
BT_S, /* white space, e.g. "\t", " "[, "\r"] */
|
||||
BT_NMSTRT, /* non-hex name start letter = "G".."Z" + "g".."z" + "_" */
|
||||
BT_COLON, /* colon = ":" */
|
||||
BT_HEX, /* hex letter = "A".."F" + "a".."f" */
|
||||
BT_DIGIT, /* digit = "0".."9" */
|
||||
BT_NAME, /* dot and middle dot = "." + chr(0xb7) */
|
||||
BT_MINUS, /* minus = "-" */
|
||||
BT_OTHER, /* known not to be a name or name start character */
|
||||
BT_NONASCII, /* might be a name or name start character */
|
||||
BT_PERCNT,
|
||||
BT_LPAR,
|
||||
BT_RPAR,
|
||||
BT_AST,
|
||||
BT_PLUS,
|
||||
BT_COMMA,
|
||||
BT_VERBAR
|
||||
BT_PERCNT, /* percent sign = "%" */
|
||||
BT_LPAR, /* left parenthesis = "(" */
|
||||
BT_RPAR, /* right parenthesis = "(" */
|
||||
BT_AST, /* asterisk = "*" */
|
||||
BT_PLUS, /* plus sign = "+" */
|
||||
BT_COMMA, /* comma = "," */
|
||||
BT_VERBAR /* vertical bar = "|" */
|
||||
};
|
||||
|
||||
#include <stddef.h>
|
||||
|
|
Loading…
Add table
Reference in a new issue