Added child_value and child_value_w helpers, added more eol handling flags, optimized strconv_t, fixed warnings in MSVC (W4), some inner refactoring, fixed child_value for empty nodes

git-svn-id: http://pugixml.googlecode.com/svn/trunk@3 99668b35-9821-0410-8761-19e4c4f06640
This commit is contained in:
arseny.kapoulkine 2006-11-06 18:38:04 +00:00
parent 69cc3fcb3a
commit 9a5d7f62fd
2 changed files with 169 additions and 75 deletions

View file

@ -101,9 +101,7 @@ namespace pugi
}
namespace
{
using namespace pugi;
{
namespace utf8
{
const unsigned char BYTE_MASK = 0xBF;
@ -111,9 +109,12 @@ namespace
const unsigned char BYTE_MASK_READ = 0x3F;
const unsigned char FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
}
}
namespace pugi
{
// Get the size that is needed for strutf16_utf8 applied to all s characters
size_t strutf16_utf8_size(const wchar_t* s)
static size_t strutf16_utf8_size(const wchar_t* s)
{
size_t length = 0;
@ -133,7 +134,7 @@ namespace
// \param ch - char
// \return position after the last char
// \rem yes, this is trom TinyXML. How would you write it the other way, without switch trick?..
char* strutf16_utf8(char* s, unsigned int ch)
static char* strutf16_utf8(char* s, unsigned int ch)
{
unsigned int length;
@ -165,7 +166,7 @@ namespace
}
// Get the size that is needed for strutf8_utf16 applied to all s characters
size_t strutf8_utf16_size(const char* s)
static size_t strutf8_utf16_size(const char* s)
{
size_t length = 0;
@ -181,7 +182,7 @@ namespace
// \param s - pointer to string
// \param ch - char
// \return position after the last char
const char* strutf8_utf16(const char* s, unsigned int& ch)
static const char* strutf8_utf16(const char* s, unsigned int& ch)
{
unsigned int length;
@ -248,14 +249,28 @@ namespace
static bool chartype_lbracket(char c) { return c == '['; }
static bool chartype_rbracket(char c) { return c == ']'; }
template <bool opt_trim, bool opt_escape, bool opt_wnorm, bool opt_wconv> static void strconv_t(char** s)
template <bool opt_trim, bool opt_escape, bool opt_wnorm, bool opt_wconv, bool opt_eol> static void strconv_t(char** s)
{
if (!s || !*s) return;
if (!opt_trim && !opt_escape && !opt_wnorm && !opt_wconv && !opt_eol) return;
// Trim whitespaces
if (opt_trim) while (chartype_space(**s)) ++(*s);
char* str = *s;
// Skip usual symbols
if (opt_escape || opt_wnorm || opt_wconv || opt_eol)
{
while (*str)
{
if (opt_wconv && *str == '&') break;
if ((opt_wnorm || opt_wconv || opt_eol) && chartype_space(*str)) break;
++str;
}
}
char* lastpos = str;
if (!*str) return;
@ -359,6 +374,14 @@ namespace
}
}
}
else if (chartype_space(*str) && opt_wnorm)
{
*lastpos++ = ' ';
while (chartype_space(*str)) ++str;
continue;
}
else if (chartype_space(*str) && opt_wconv)
{
if (*str == 0x0d && *(str + 1) == 0x0a) ++str;
@ -368,7 +391,7 @@ namespace
continue;
}
else if (*str == 0x0d && !opt_wnorm)
else if (*str == 0x0d && !opt_wnorm && opt_eol)
{
if (*(str + 1) == 0x0a) ++str;
++str;
@ -376,14 +399,6 @@ namespace
continue;
}
else if (chartype_space(*str) && opt_wnorm)
{
*lastpos++ = ' ';
while (chartype_space(*str)) ++str;
continue;
}
*lastpos++ = *str++;
}
@ -395,64 +410,128 @@ namespace
}
else *lastpos = 0;
}
static void strconv_setup(void (*&func)(char**), unsigned int opt_trim, unsigned int opt_escape, unsigned int opt_wnorm, unsigned int opt_wconv)
static void strconv_setup(void (*&func)(char**), unsigned int opt_trim, unsigned int opt_escape, unsigned int opt_wnorm, unsigned int opt_wconv, unsigned int opt_eol)
{
if (opt_wconv)
if (opt_eol)
{
if (opt_trim)
if (opt_wconv)
{
if (opt_escape)
if (opt_trim)
{
if (opt_wnorm) func = &strconv_t<true, true, true, true>;
else func = &strconv_t<true, true, false, true>;
if (opt_escape)
{
if (opt_wnorm) func = &strconv_t<true, true, true, true, true>;
else func = &strconv_t<true, true, false, true, true>;
}
else
{
if (opt_wnorm) func = &strconv_t<true, false, true, true, true>;
else func = &strconv_t<true, false, false, true, true>;
}
}
else
{
if (opt_wnorm) func = &strconv_t<true, false, true, true>;
else func = &strconv_t<true, false, false, true>;
if (opt_escape)
{
if (opt_wnorm) func = &strconv_t<false, true, true, true, true>;
else func = &strconv_t<false, true, false, true, true>;
}
else
{
if (opt_wnorm) func = &strconv_t<false, false, true, true, true>;
else func = &strconv_t<false, false, false, true, true>;
}
}
}
else
{
if (opt_escape)
if (opt_trim)
{
if (opt_wnorm) func = &strconv_t<false, true, true, true>;
else func = &strconv_t<false, true, false, true>;
if (opt_escape)
{
if (opt_wnorm) func = &strconv_t<true, true, true, false, true>;
else func = &strconv_t<true, true, false, false, true>;
}
else
{
if (opt_wnorm) func = &strconv_t<true, false, true, false, true>;
else func = &strconv_t<true, false, false, false, true>;
}
}
else
{
if (opt_wnorm) func = &strconv_t<false, false, true, true>;
else func = &strconv_t<false, false, false, true>;
if (opt_escape)
{
if (opt_wnorm) func = &strconv_t<false, true, true, false, true>;
else func = &strconv_t<false, true, false, false, true>;
}
else
{
if (opt_wnorm) func = &strconv_t<false, false, true, false, true>;
else func = &strconv_t<false, false, false, false, true>;
}
}
}
}
else
{
if (opt_trim)
if (opt_wconv)
{
if (opt_escape)
if (opt_trim)
{
if (opt_wnorm) func = &strconv_t<true, true, true, false>;
else func = &strconv_t<true, true, false, false>;
if (opt_escape)
{
if (opt_wnorm) func = &strconv_t<true, true, true, true, false>;
else func = &strconv_t<true, true, false, true, false>;
}
else
{
if (opt_wnorm) func = &strconv_t<true, false, true, true, false>;
else func = &strconv_t<true, false, false, true, false>;
}
}
else
{
if (opt_wnorm) func = &strconv_t<true, false, true, false>;
else func = &strconv_t<true, false, false, false>;
if (opt_escape)
{
if (opt_wnorm) func = &strconv_t<false, true, true, true, false>;
else func = &strconv_t<false, true, false, true, false>;
}
else
{
if (opt_wnorm) func = &strconv_t<false, false, true, true, false>;
else func = &strconv_t<false, false, false, true, false>;
}
}
}
else
{
if (opt_escape)
if (opt_trim)
{
if (opt_wnorm) func = &strconv_t<false, true, true, false>;
else func = &strconv_t<false, true, false, false>;
if (opt_escape)
{
if (opt_wnorm) func = &strconv_t<true, true, true, false, false>;
else func = &strconv_t<true, true, false, false, false>;
}
else
{
if (opt_wnorm) func = &strconv_t<true, false, true, false, false>;
else func = &strconv_t<true, false, false, false, false>;
}
}
else
{
if (opt_wnorm) func = &strconv_t<false, false, true, false>;
else func = &strconv_t<false, false, false, false>;
if (opt_escape)
{
if (opt_wnorm) func = &strconv_t<false, true, true, false, false>;
else func = &strconv_t<false, true, false, false, false>;
}
else
{
if (opt_wnorm) func = &strconv_t<false, false, true, false, false>;
else func = &strconv_t<false, false, false, false, false>;
}
}
}
}
@ -526,8 +605,8 @@ namespace
void (*strconv_pcdata)(char**);
void (*strconv_attribute)(char**);
strconv_setup(strconv_attribute, OPTSET(parse_trim_attribute), OPTSET(parse_escapes_attribute), OPTSET(parse_wnorm_attribute), OPTSET(parse_wconv_attribute));
strconv_setup(strconv_pcdata, OPTSET(parse_trim_pcdata), OPTSET(parse_escapes_pcdata), OPTSET(parse_wnorm_pcdata), false);
strconv_setup(strconv_attribute, OPTSET(parse_trim_attribute), OPTSET(parse_escapes_attribute), OPTSET(parse_wnorm_attribute), OPTSET(parse_wconv_attribute), OPTSET(parse_eol_attribute));
strconv_setup(strconv_pcdata, OPTSET(parse_trim_pcdata), OPTSET(parse_escapes_pcdata), OPTSET(parse_wnorm_pcdata), false, OPTSET(parse_eol_pcdata));
char ch = 0; // Current char, in cases where we must null-terminate before we test.
xml_node_struct* cursor = xmldoc; // Tree node cursor.
@ -622,7 +701,7 @@ namespace
if (OPTSET(parse_eol_cdata))
{
strconv_t<false, false, false, false>(&cursor->value);
strconv_t<false, false, false, false, true>(&cursor->value);
}
POPNODE(); // Pop since this is a standalone.
@ -860,10 +939,13 @@ namespace
}
return s;
}
private:
const xml_parser_impl& operator=(const xml_parser_impl&);
};
// Compare lhs with [rhs_begin, rhs_end)
int strcmprange(const char* lhs, const char* rhs_begin, const char* rhs_end)
static int strcmprange(const char* lhs, const char* rhs_begin, const char* rhs_end)
{
while (*lhs && rhs_begin != rhs_end && *lhs == *rhs_begin)
{
@ -876,7 +958,7 @@ namespace
}
// Character set pattern match.
int strcmpwild_cset(const char** src, const char** dst)
static int strcmpwild_cset(const char** src, const char** dst)
{
int find = 0, excl = 0, star = 0;
@ -909,22 +991,14 @@ namespace
return find;
}
}
namespace pugi
{
namespace impl
{
int strcmpwild(const char* src, const char* dst);
}
}
namespace
{
using namespace pugi;
// Wildcard pattern match.
int strcmpwild_astr(const char** src, const char** dst)
static int strcmpwild_astr(const char** src, const char** dst)
{
int find = 1;
++(*src);
@ -952,10 +1026,7 @@ namespace
return find;
}
}
}
namespace pugi
{
namespace impl
{
// Compare two strings, with globbing, and character sets.
@ -977,16 +1048,18 @@ namespace pugi
}
}
extern "C"
int strcmp(const char* lhs, const char* rhs)
{
int strcmpwildimpl(const char* src, const char* dst)
{
return impl::strcmpwild(src, dst);
}
typedef int (*strcmpfunc)(const char*, const char*);
return ::strcmp(lhs, rhs);
}
int strcmpwildimpl(const char* src, const char* dst)
{
return impl::strcmpwild(src, dst);
}
typedef int (*strcmpfunc)(const char*, const char*);
xml_attribute_struct::xml_attribute_struct(): name(0), value(0), prev_attribute(0), next_attribute(0)
{
}
@ -1355,12 +1428,23 @@ namespace pugi
const char* xml_node::child_value() const
{
for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
if ((i->type == node_pcdata || i->type == node_cdata) && i->value)
return i->value;
if (!empty())
for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
if ((i->type == node_pcdata || i->type == node_cdata) && i->value)
return i->value;
return "";
}
const char* xml_node::child_value(const char* name) const
{
return child(name).child_value();
}
const char* xml_node::child_value_w(const char* name) const
{
return child_w(name).child_value();
}
xml_attribute xml_node::first_attribute() const
{
return _root ? xml_attribute(_root->first_attribute) : xml_attribute();

View file

@ -46,10 +46,13 @@ namespace pugi
const unsigned int parse_wnorm_pcdata = 0x00000200; ///< Normalize spaces in pcdata
const unsigned int parse_wnorm_attribute = 0x00000400; ///< Normalize spaces in attributes
const unsigned int parse_wconv_attribute = 0x00000800; ///< Convert space-like characters to spaces in attributes (only if wnorm is not set)
const unsigned int parse_eol_cdata = 0x00001000; ///< Perform EOL handling in CDATA sections
const unsigned int parse_check_end_tags = 0x00002000; ///< Check start and end tag names and return error if names mismatch
const unsigned int parse_match_end_tags = 0x00004000; ///< Try to find corresponding start tag for an end tag
const unsigned int parse_default = 0x0000FFFF & ~parse_ws_pcdata; ///< Set all flags, except parse_ws_pcdata
const unsigned int parse_eol_pcdata = 0x00001000; ///< Perform EOL handling in pcdata
const unsigned int parse_eol_attribute = 0x00002000; ///< Perform EOL handling in attrobites
const unsigned int parse_eol_cdata = 0x00004000; ///< Perform EOL handling in CDATA sections
const unsigned int parse_check_end_tags = 0x00010000; ///< Check start and end tag names and return error if names mismatch
const unsigned int parse_match_end_tags = 0x00020000; ///< Try to find corresponding start tag for an end tag
///< Set all flags, except parse_ws_pcdata and parse_trim_attribute
const unsigned int parse_default = 0x00FFFFFF & ~parse_ws_pcdata & ~parse_trim_attribute;
const unsigned int parse_noset = 0x80000000; ///< Parse with flags in xml_parser
const unsigned int parse_w3c = parse_pi | parse_comments | parse_cdata |
@ -266,6 +269,13 @@ namespace pugi
/// Return PCDATA/CDATA that is child of current node. If none, return empty string.
const char* child_value() const;
/// Return PCDATA/CDATA that is child of specified child node. If none, return empty string.
const char* child_value(const char* name) const;
/// Return PCDATA/CDATA that is child of specified child node. If none, return empty string.
/// Enable wildcard matching.
const char* child_value_w(const char* name) const;
public:
/// Access node's first attribute if any, else xml_attribute()
xml_attribute first_attribute() const;
@ -472,7 +482,7 @@ namespace pugi
std::vector<char> _buffer; ///< character buffer
xml_memory_block _memory; ///< Memory block
xml_node_struct* _xmldoc; ///< Pointer to current XML document tree root.
unsigned int _optmsk; ///< Parser options.