mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-45 RBBI rule builder, fixed bug in handling of 'quoted' literals.
X-SVN-Rev: 9108
This commit is contained in:
parent
f414b9c5d2
commit
566fa58fff
3 changed files with 57 additions and 11 deletions
|
@ -700,6 +700,21 @@ void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, Unicode
|
|||
|
||||
|
||||
|
||||
//
|
||||
// Assorted Unicode character constants.
|
||||
// Numeric because there is no portable way to enter them as literals.
|
||||
// (Think EBCDIC).
|
||||
//
|
||||
static const UChar chCR = 0x0d; // New lines, for terminating comments.
|
||||
static const UChar chLF = 0x0a;
|
||||
static const UChar chNEL = 0x85; // NEL newline variant
|
||||
static const UChar chLS = 0x2028; // Unicode Line Separator
|
||||
static const UChar chApos = 0x27; // single quote, for quoted chars.
|
||||
static const UChar chPound = 0x23; // '#', introduces a comment.
|
||||
static const UChar chBackSlash = 0x5c; // '\' introduces a char escape
|
||||
static const UChar chLParen = 0x28;
|
||||
static const UChar chRParen = 0x29;
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
//
|
||||
|
@ -708,11 +723,6 @@ void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, Unicode
|
|||
// keep track of input position for error reporting.
|
||||
//
|
||||
//----------------------------------------------------------------------------------------
|
||||
static const UChar chCR = 0x0d; // New lines, for terminating comments.
|
||||
static const UChar chLF = 0x0a;
|
||||
static const UChar chNEL = 0x85; // NEL newline variant
|
||||
static const UChar chLS = 0x2028; // Unicode Line Separator
|
||||
static const UChar chApos = 0x27; // single quote, for quoted chars.
|
||||
UChar32 RBBIRuleScanner::nextCharLL() {
|
||||
UChar32 ch;
|
||||
|
||||
|
@ -758,10 +768,6 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) {
|
|||
|
||||
// Unicode Character constants needed for the processing done by nextChar(),
|
||||
// in hex because literals wont work on EBCDIC machines.
|
||||
static const UChar chPound = 0x23; // '#', introduces a comment.
|
||||
static const UChar chBackSlash = 0x5c; // '\' introduces a char escape
|
||||
static const UChar ch_U = 0x55; // Escapes with special meaning.
|
||||
static const UChar ch_u = 0x75;
|
||||
|
||||
fScanIndex = fNextIndex;
|
||||
c.fChar = nextCharLL();
|
||||
|
@ -779,9 +785,15 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) {
|
|||
else
|
||||
{
|
||||
// Single quote, by itself.
|
||||
// Toggle quoting mode, then recursively call ourselves to get a char to return.
|
||||
// Toggle quoting mode.
|
||||
// Return either '(' or ')', because quotes cause a grouping of the quoted text.
|
||||
fQuoteMode = !fQuoteMode;
|
||||
nextChar(c);
|
||||
if (fQuoteMode == TRUE) {
|
||||
c.fChar = chLParen;
|
||||
} else {
|
||||
c.fChar = chRParen;
|
||||
}
|
||||
c.fEscaped = FALSE; // The paren that we return is not escaped.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -614,6 +614,33 @@ void RBBIAPITest::TestBuilder() {
|
|||
}
|
||||
|
||||
|
||||
//
|
||||
// TestQuoteGrouping
|
||||
// Single quotes within rules imply a grouping, so that a modifier
|
||||
// following the quoted text (* or +) applies to all of the quoted chars.
|
||||
//
|
||||
void RBBIAPITest::TestQuoteGrouping() {
|
||||
UnicodeString rulesString1 = "#Here comes the rule...\n"
|
||||
"'$@!'*;\n"
|
||||
".;\n";
|
||||
|
||||
UnicodeString testString1 = "$@!X$@!XX";
|
||||
// 01234567890
|
||||
int32_t bounds1[] = {0, 3, 4, 7, 8, 9};
|
||||
UErrorCode status=U_ZERO_ERROR;
|
||||
UParseError parseError;
|
||||
|
||||
RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
|
||||
if(U_FAILURE(status)) {
|
||||
errln("FAIL : in construction");
|
||||
} else {
|
||||
bi->setText(testString1);
|
||||
doBoundaryTest(*bi, testString1, bounds1);
|
||||
}
|
||||
delete bi;
|
||||
}
|
||||
|
||||
|
||||
//---------------------------------------------
|
||||
// runIndexedTest
|
||||
//---------------------------------------------
|
||||
|
@ -631,6 +658,7 @@ void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name,
|
|||
case 5: name = "TestLastPreviousPreceding"; if (exec) TestLastPreviousPreceding(); break;
|
||||
case 6: name = "TestIsBoundary"; if (exec) TestIsBoundary(); break;
|
||||
case 7: name = "TestBuilder"; if (exec) TestBuilder(); break;
|
||||
case 8: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
|
||||
|
||||
default: name = ""; break; /*needed to end loop*/
|
||||
}
|
||||
|
|
|
@ -63,6 +63,12 @@ public:
|
|||
**/
|
||||
void TestBuilder(void);
|
||||
|
||||
/**
|
||||
* Tests grouping effect of 'single quotes' in rules.
|
||||
**/
|
||||
void TestQuoteGrouping();
|
||||
|
||||
|
||||
/**
|
||||
*Internal subroutines
|
||||
**/
|
||||
|
|
Loading…
Add table
Reference in a new issue