ICU-3262 make UnicodeSet constructor and applyPattern() taking a ParsePosition, options bit mask, and SymbolTable pointer public

X-SVN-Rev: 13417
This commit is contained in:
Alan Liu 2003-10-14 21:47:59 +00:00
parent 55af1fed15
commit c91c33fbdf
12 changed files with 155 additions and 161 deletions

View file

@ -2314,7 +2314,50 @@ SOURCE=.\ruleiter.h
# End Source File
# Begin Source File
SOURCE=.\symtable.h
SOURCE=.\unicode\symtable.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\unicode\symtable.h
"..\..\include\unicode\symtable.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\symtable.h
"..\..\include\unicode\symtable.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Release"
# Begin Custom Build
InputPath=.\unicode\symtable.h
"..\..\include\unicode\symtable.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Debug"
# Begin Custom Build
InputPath=.\unicode\symtable.h
"..\..\include\unicode\symtable.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ENDIF
# End Source File
# Begin Source File

View file

@ -19,7 +19,7 @@
#include "unicode/parseerr.h"
#include "uhash.h"
#include "uvector.h"
#include "symtable.h" // For UnicodeSet parsing, is the interface that
#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
// looks up references to $variables within a set.

View file

@ -1092,8 +1092,8 @@ void RBBIRuleScanner::scanSet() {
pos.setIndex(fScanIndex);
startPos = fScanIndex;
UErrorCode localStatus = U_ZERO_ERROR;
uset = new UnicodeSet(fRB->fRules, pos,
*fSymbolTable,
uset = new UnicodeSet(fRB->fRules, pos, USET_IGNORE_SPACE,
fSymbolTable,
localStatus);
if (U_FAILURE(localStatus)) {
// TODO: Get more accurate position of the error from UnicodeSet's return info.

View file

@ -18,7 +18,7 @@
#include "unicode/parseerr.h"
#include "uhash.h"
#include "uvector.h"
#include "symtable.h" // For UnicodeSet parsing, is the interface that
#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
// looks up references to $variables within a set.
#include "rbbinode.h"
//#include "rbbitblb.h"

View file

@ -11,7 +11,7 @@
#include "ruleiter.h"
#include "unicode/parsepos.h"
#include "unicode/unistr.h"
#include "symtable.h" // TODO => unicode/symtable.h
#include "unicode/symtable.h"
#include "uprops.h"
U_NAMESPACE_BEGIN

View file

@ -335,6 +335,25 @@ public:
*/
UnicodeSet(const UnicodeString& pattern,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status);
/**
* Constructs a set from the given pattern. See the class description
* for the syntax of the pattern language.
* @param pattern a string specifying what characters are in the set
* @param pos on input, the position in pattern at which to start parsing.
* On output, the position after the last character parsed.
* @param options bitmask for options to apply to the pattern.
* Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
* @param symbols a symbol table mapping variable names to values
* and stand-in characters to UnicodeSets; may be NULL
* @param status input-output error code
* @draft ICU 2.8
*/
UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status);
#ifdef U_USE_UNICODESET_DEPRECATES
@ -432,6 +451,7 @@ public:
* @param pattern a string specifying what characters are in the set
* @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
* contains a syntax error.
* @return a reference to this
* @stable ICU 2.0
*/
virtual UnicodeSet& applyPattern(const UnicodeString& pattern,
@ -444,12 +464,49 @@ public:
* @param pattern a string specifying what characters are in the set
* @param options bitmask for options to apply to the pattern.
* Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
* @param symbols a symbol table mapping variable names to
* values and stand-ins to UnicodeSets; may be NULL
* @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
* contains a syntax error.
* @return a reference to this
* @internal
*/
UnicodeSet& applyPattern(const UnicodeString& pattern,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status);
/**
* Parses the given pattern, starting at the given position. The
* character at pattern.charAt(pos.getIndex()) must be '[', or the
* parse fails. Parsing continues until the corresponding closing
* ']'. If a syntax error is encountered between the opening and
* closing brace, the parse fails. Upon return from a successful
* parse, the ParsePosition is updated to point to the character
* following the closing ']', and a StringBuffer containing a
* pairs list for the parsed pattern is returned. This method calls
* itself recursively to parse embedded subpatterns.
*
* @param pattern the string containing the pattern to be parsed.
* The portion of the string from pos.getIndex(), which must be a
* '[', to the corresponding closing ']', is parsed.
* @param pos upon entry, the position at which to being parsing.
* The character at pattern.charAt(pos.getIndex()) must be a '['.
* Upon return from a successful parse, pos.getIndex() is either
* the character after the closing ']' of the parsed pattern, or
* pattern.length() if the closing ']' is the last character of
* the pattern string.
* @param options bitmask for options to apply to the pattern.
* Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
* @param symbols a symbol table mapping variable names to
* values and stand-ins to UnicodeSets; may be NULL
* @return a reference to this
* @draft ICU 2.8
*/
UnicodeSet& applyPattern(const UnicodeString& pattern,
ParsePosition& pos,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status);
/**
@ -1117,40 +1174,6 @@ private:
// RuleBasedTransliterator support
//----------------------------------------------------------------
public:
/**
* Constructs a set from the given pattern. See the class description
* for the syntax of the pattern language.
* @param pattern a string specifying what characters are in the set
* @param pos on input, the position in pattern at which to start parsing.
* On output, the position after the last character parsed.
* @param varNameToChar a mapping from variable names (String) to characters
* (Character). May be null. If varCharToSet is non-null, then names may
* map to either single characters or sets, depending on whether a mapping
* exists in varCharToSet. If varCharToSet is null then all names map to
* single characters.
* @param varCharToSet a mapping from characters (Character objects from
* varNameToChar) to UnicodeSet objects. May be null. Is only used if
* varNameToChar is also non-null.
* @exception <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
* contains a syntax error.
* @draft ICU 2.8
*/
UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
const SymbolTable& symbols,
UErrorCode& status);
/**
* Constructs a set from the given pattern. Identical to the
* 4-parameter ParsePosition contstructor, but does not take a
* SymbolTable, and does not recognize embedded variables.
* @draft ICU 2.8
*/
UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
uint32_t options, UErrorCode& status);
private:
/**
@ -1166,40 +1189,10 @@ private:
// Implementation: Pattern parsing
//----------------------------------------------------------------
/**
* Parses the given pattern, starting at the given position. The
* character at pattern.charAt(pos.getIndex()) must be '[', or the
* parse fails. Parsing continues until the corresponding closing
* ']'. If a syntax error is encountered between the opening and
* closing brace, the parse fails. Upon return from a successful
* parse, the ParsePosition is updated to point to the character
* following the closing ']', and a StringBuffer containing a
* pairs list for the parsed pattern is returned. This method calls
* itself recursively to parse embedded subpatterns.
*
* @param pattern the string containing the pattern to be parsed.
* The portion of the string from pos.getIndex(), which must be a
* '[', to the corresponding closing ']', is parsed.
* @param pos upon entry, the position at which to being parsing.
* The character at pattern.charAt(pos.getIndex()) must be a '['.
* Upon return from a successful parse, pos.getIndex() is either
* the character after the closing ']' of the parsed pattern, or
* pattern.length() if the closing ']' is the last character of
* the pattern string.
* @return a StringBuffer containing a pairs list for the parsed
* substring of <code>pattern</code>
* @exception U_ILLEGAL_ARGUMENT_ERROR if the parse fails.
*/
void applyPattern(const UnicodeString& pattern,
ParsePosition& pos,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status);
void applyPattern(RuleCharacterIterator& chars,
const SymbolTable* symbols,
UnicodeString& rebuiltPat,
int32_t options,
uint32_t options,
UErrorCode& ec);
//----------------------------------------------------------------

View file

@ -12,7 +12,7 @@
#include "unicode/parsepos.h"
#include "unicode/uchar.h"
#include "unicode/uscript.h"
#include "symtable.h" // TODO => unicode/symtable.h
#include "unicode/symtable.h"
#include "ruleiter.h"
#include "cmemory.h"
#include "uhash.h"
@ -325,7 +325,7 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern,
status = U_MEMORY_ALLOCATION_ERROR;
}else{
allocateStrings();
applyPattern(pattern, USET_IGNORE_SPACE, status);
applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);
}
}
_dbgct(this);
@ -341,6 +341,7 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern,
*/
UnicodeSet::UnicodeSet(const UnicodeString& pattern,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status) :
len(0), capacity(START_EXTRA), bufferCapacity(0),
list(0), buffer(0), strings(0)
@ -352,15 +353,15 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern,
status = U_MEMORY_ALLOCATION_ERROR;
}else{
allocateStrings();
applyPattern(pattern, options, status);
applyPattern(pattern, options, symbols, status);
}
}
_dbgct(this);
}
// For internal use by RuleBasedTransliterator
UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
const SymbolTable& symbols,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status) :
len(0), capacity(START_EXTRA), bufferCapacity(0),
list(0), buffer(0), strings(0)
@ -372,26 +373,7 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
status = U_MEMORY_ALLOCATION_ERROR;
}else{
allocateStrings();
applyPattern(pattern, pos, USET_IGNORE_SPACE, &symbols, status);
}
}
_dbgct(this);
}
// For internal use by TransliteratorIDParser
UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
uint32_t options, UErrorCode& status) :
len(0), capacity(START_EXTRA), bufferCapacity(0),
list(0), buffer(0), strings(0)
{
if(U_SUCCESS(status)){
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
/* test for NULL */
if(list == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}else{
allocateStrings();
applyPattern(pattern, pos, options, NULL, status);
applyPattern(pattern, pos, options, symbols, status);
}
}
_dbgct(this);
@ -549,9 +531,10 @@ UnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) {
*/
UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
UErrorCode& status) {
return applyPattern(pattern, USET_IGNORE_SPACE, status);
return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);
}
/**
* Modifies this set to represent the set specified by the given
* pattern, optionally ignoring white space. See the class
@ -562,31 +545,52 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
*/
UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status) {
if (U_FAILURE(status)) {
return *this;
}
ParsePosition pos(0);
applyPattern(pattern, pos, options, NULL, status);
applyPattern(pattern, pos, options, symbols, status);
if (U_FAILURE(status)) return *this;
int32_t i = pos.getIndex();
int32_t n = pattern.length();
if (options & USET_IGNORE_SPACE) {
// Skip over trailing whitespace
while (i<n && uprv_isRuleWhiteSpace(pattern.charAt(i))) {
++i;
}
ICU_Utility::skipWhitespace(pattern, i, TRUE);
}
if (i != n) {
if (i != pattern.length()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
}
return *this;
}
UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
ParsePosition& pos,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status) {
if (U_FAILURE(status)) {
return *this;
}
// Need to build the pattern in a temporary string because
// _applyPattern calls add() etc., which set pat to empty.
UnicodeString rebuiltPat;
RuleCharacterIterator chars(pattern, symbols, pos);
applyPattern(chars, symbols, rebuiltPat, options, status);
if (U_FAILURE(status)) return *this;
if (chars.inVariable()) {
// syntaxError(chars, "Extra chars in variable value");
status = U_MALFORMED_SET;
return *this;
}
pat = rebuiltPat;
return *this;
}
/**
* Return true if the given position, in the given pattern, appears
* to be the start of a UnicodeSet pattern.
@ -1848,52 +1852,6 @@ int32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode&
// Implementation: Pattern parsing
//----------------------------------------------------------------
/**
* Parses the given pattern, starting at the given position. The
* character at pattern.charAt(pos.getIndex()) must be '[', or the
* parse fails. Parsing continues until the corresponding closing
* ']'. If a syntax error is encountered between the opening and
* closing brace, the parse fails. Upon return from a successful
* parse, the ParsePosition is updated to point to the character
* following the closing ']', and a StringBuffer containing a
* pairs list for the parsed pattern is returned. This method calls
* itself recursively to parse embedded subpatterns.
*
* @param pattern the string containing the pattern to be parsed.
* The portion of the string from pos.getIndex(), which must be a
* '[', to the corresponding closing ']', is parsed.
* @param pos upon entry, the position at which to being parsing.
* The character at pattern.charAt(pos.getIndex()) must be a '['.
* Upon return from a U_SUCCESSful parse, pos.getIndex() is either
* the character after the closing ']' of the parsed pattern, or
* pattern.length() if the closing ']' is the last character of
* the pattern string.
* @return a StringBuffer containing a pairs list for the parsed
* substring of <code>pattern</code>
* @exception IllegalArgumentException if the parse fails.
*/
void UnicodeSet::applyPattern(const UnicodeString& pattern,
ParsePosition& pos,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
// Need to build the pattern in a temporary string because
// _applyPattern calls add() etc., which set pat to empty.
UnicodeString rebuiltPat;
RuleCharacterIterator chars(pattern, symbols, pos);
applyPattern(chars, symbols, rebuiltPat, options, status);
if (U_FAILURE(status)) return;
if (chars.inVariable()) {
// syntaxError(chars, "Extra chars in variable value");
status = U_MALFORMED_SET;
return;
}
pat = rebuiltPat;
}
/**
* A small all-inline class to manage a UnicodeSet pointer. Add
* operator->() etc. as needed.
@ -1929,7 +1887,7 @@ public:
void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
const SymbolTable* symbols,
UnicodeString& rebuiltPat,
int32_t options,
uint32_t options,
UErrorCode& ec) {
if (U_FAILURE(ec)) return;

View file

@ -58,7 +58,7 @@ uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
UErrorCode* ec)
{
UnicodeString pat(patternLength==-1, pattern, patternLength);
UnicodeSet* set = new UnicodeSet(pat, options, *ec);
UnicodeSet* set = new UnicodeSet(pat, options, NULL, *ec);
/* test for NULL */
if(set == 0) {
*ec = U_MEMORY_ALLOCATION_ERROR;

View file

@ -29,7 +29,7 @@
#include "rbt_rule.h"
#include "strmatch.h"
#include "strrepl.h"
#include "symtable.h"
#include "unicode/symtable.h"
#include "tridpars.h"
#include "uvector.h"
#include "util.h"
@ -1417,7 +1417,7 @@ int32_t TransliteratorParser::syntaxError(UErrorCode parseErrorCode,
*/
UChar TransliteratorParser::parseSet(const UnicodeString& rule,
ParsePosition& pos) {
UnicodeSet* set = new UnicodeSet(rule, pos, *parseData, status);
UnicodeSet* set = new UnicodeSet(rule, pos, USET_IGNORE_SPACE, parseData, status);
set->compact();
return generateStandInFor(set);
}

View file

@ -3439,7 +3439,7 @@ UnicodeSet *RegexCompile::scanSet() {
}
uset = new UnicodeSet(fRXPat->fPattern, pos,
usetFlags, localStatus);
usetFlags, NULL, localStatus);
if (U_FAILURE(localStatus)) {
// TODO: Get more accurate position of the error from UnicodeSet's return info.
// UnicodeSet appears to not be reporting correctly at this time.
@ -3512,7 +3512,7 @@ UnicodeSet *RegexCompile::scanProp() {
}
// Build the UnicodeSet from the set pattern we just built up in a string.
uset = new UnicodeSet(setPattern, usetFlags, *fStatus);
uset = new UnicodeSet(setPattern, usetFlags, NULL, *fStatus);
if (U_FAILURE(*fStatus)) {
delete uset;
uset = NULL;

View file

@ -251,7 +251,7 @@ UnicodeSet* TransliteratorIDParser::parseGlobalFilter(const UnicodeString& id, i
if (UnicodeSet::resemblesPattern(id, pos)) {
ParsePosition ppos(pos);
UErrorCode ec = U_ZERO_ERROR;
filter = new UnicodeSet(id, ppos, USET_IGNORE_SPACE, ec);
filter = new UnicodeSet(id, ppos, USET_IGNORE_SPACE, NULL, ec);
/* test for NULL */
if (filter == 0) {
pos = start;
@ -705,7 +705,7 @@ TransliteratorIDParser::parseFilterID(const UnicodeString& id, int32_t& pos,
ParsePosition ppos(pos);
UErrorCode ec = U_ZERO_ERROR;
UnicodeSet set(id, ppos, USET_IGNORE_SPACE, ec);
UnicodeSet set(id, ppos, USET_IGNORE_SPACE, NULL, ec);
if (U_FAILURE(ec)) {
pos = start;
return NULL;

View file

@ -16,7 +16,7 @@
#include "unicode/usetiter.h"
#include "unicode/ustring.h"
#include "unicode/parsepos.h"
#include "symtable.h" // TODO move this to unicode/symtable.h
#include "unicode/symtable.h"
#include "hash.h"
UnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) {
@ -953,13 +953,13 @@ void UnicodeSetTest::TestCloseOver() {
}
// Test the pattern API
s.applyPattern("[abc]", USET_CASE_INSENSITIVE, ec);
s.applyPattern("[abc]", USET_CASE_INSENSITIVE, NULL, ec);
if (U_FAILURE(ec)) {
errln("FAIL: applyPattern failed");
} else {
expectContainment(s, "abcABC", "defDEF");
}
UnicodeSet v("[^abc]", USET_CASE_INSENSITIVE, ec);
UnicodeSet v("[^abc]", USET_CASE_INSENSITIVE, NULL, ec);
if (U_FAILURE(ec)) {
errln("FAIL: constructor failed");
} else {
@ -1246,7 +1246,7 @@ void UnicodeSetTest::TestSymbolTable() {
i += 2;
ParsePosition pos(0);
UnicodeSet us(inpat, pos, sym, ec);
UnicodeSet us(inpat, pos, USET_IGNORE_SPACE, &sym, ec);
if (U_FAILURE(ec)) {
errln("FAIL: couldn't construct UnicodeSet");
continue;