ICU-45 RBBI rule builder, fixed bug in handling of 'quoted' literals.

X-SVN-Rev: 9108
This commit is contained in:
Andy Heninger 2002-07-12 01:30:23 +00:00
parent f414b9c5d2
commit 566fa58fff
3 changed files with 57 additions and 11 deletions

View file

@ -700,6 +700,21 @@ void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, Unicode
//
// Assorted Unicode character constants.
// Numeric because there is no portable way to enter them as literals.
// (Think EBCDIC).
//
static const UChar chCR = 0x0d; // New lines, for terminating comments.
static const UChar chLF = 0x0a;
static const UChar chNEL = 0x85; // NEL newline variant
static const UChar chLS = 0x2028; // Unicode Line Separator
static const UChar chApos = 0x27; // single quote, for quoted chars.
static const UChar chPound = 0x23; // '#', introduces a comment.
static const UChar chBackSlash = 0x5c; // '\' introduces a char escape
static const UChar chLParen = 0x28;
static const UChar chRParen = 0x29;
//----------------------------------------------------------------------------------------
//
@ -708,11 +723,6 @@ void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, Unicode
// keep track of input position for error reporting.
//
//----------------------------------------------------------------------------------------
static const UChar chCR = 0x0d; // New lines, for terminating comments.
static const UChar chLF = 0x0a;
static const UChar chNEL = 0x85; // NEL newline variant
static const UChar chLS = 0x2028; // Unicode Line Separator
static const UChar chApos = 0x27; // single quote, for quoted chars.
UChar32 RBBIRuleScanner::nextCharLL() {
UChar32 ch;
@ -758,10 +768,6 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) {
// Unicode Character constants needed for the processing done by nextChar(),
// in hex because literals wont work on EBCDIC machines.
static const UChar chPound = 0x23; // '#', introduces a comment.
static const UChar chBackSlash = 0x5c; // '\' introduces a char escape
static const UChar ch_U = 0x55; // Escapes with special meaning.
static const UChar ch_u = 0x75;
fScanIndex = fNextIndex;
c.fChar = nextCharLL();
@ -779,9 +785,15 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) {
else
{
// Single quote, by itself.
// Toggle quoting mode, then recursively call ourselves to get a char to return.
// Toggle quoting mode.
// Return either '(' or ')', because quotes cause a grouping of the quoted text.
fQuoteMode = !fQuoteMode;
nextChar(c);
if (fQuoteMode == TRUE) {
c.fChar = chLParen;
} else {
c.fChar = chRParen;
}
c.fEscaped = FALSE; // The paren that we return is not escaped.
return;
}
}

View file

@ -614,6 +614,33 @@ void RBBIAPITest::TestBuilder() {
}
//
// TestQuoteGrouping
// Single quotes within rules imply a grouping, so that a modifier
// following the quoted text (* or +) applies to all of the quoted chars.
//
void RBBIAPITest::TestQuoteGrouping() {
UnicodeString rulesString1 = "#Here comes the rule...\n"
"'$@!'*;\n"
".;\n";
UnicodeString testString1 = "$@!X$@!XX";
// 01234567890
int32_t bounds1[] = {0, 3, 4, 7, 8, 9};
UErrorCode status=U_ZERO_ERROR;
UParseError parseError;
RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
if(U_FAILURE(status)) {
errln("FAIL : in construction");
} else {
bi->setText(testString1);
doBoundaryTest(*bi, testString1, bounds1);
}
delete bi;
}
//---------------------------------------------
// runIndexedTest
//---------------------------------------------
@ -631,6 +658,7 @@ void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name,
case 5: name = "TestLastPreviousPreceding"; if (exec) TestLastPreviousPreceding(); break;
case 6: name = "TestIsBoundary"; if (exec) TestIsBoundary(); break;
case 7: name = "TestBuilder"; if (exec) TestBuilder(); break;
case 8: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
default: name = ""; break; /*needed to end loop*/
}

View file

@ -63,6 +63,12 @@ public:
**/
void TestBuilder(void);
/**
* Tests grouping effect of 'single quotes' in rules.
**/
void TestQuoteGrouping();
/**
*Internal subroutines
**/