mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-2292 sentence break rules updated
X-SVN-Rev: 13649
This commit is contained in:
parent
41ac2f557b
commit
cea200bf0a
2 changed files with 61 additions and 27 deletions
|
@ -78,31 +78,75 @@ $EndSequence = $InteriorChars* $SepSeq?;
|
|||
|
||||
!!reverse;
|
||||
|
||||
#
|
||||
# Reverse Rules
|
||||
#
|
||||
$EndGorp = ($Term | $ATerm | $Sep | $Close | $Extend | $Format | $Sp);
|
||||
$RevEndSequence = $EndGorp* $InteriorChars* $EndGorp* | $Sep [^$ATerm $Term]*;
|
||||
$ReverseLowerWordFollows = $Lower [^$OLetter $Upper $Lower $Sep]* $ATerm $InteriorChars*;
|
||||
$ReverseUpperSurround = $Upper $Format* $Extend* $ATerm $Format* $Extend* $Upper $InteriorChars*;
|
||||
$ReverseNumberFollows = $Numeric $Format* $Extend* $ATerm $InteriorChars*;
|
||||
# rule 6
|
||||
|
||||
$RevEndSequence ($ReverseLowerWordFollows | $ReverseUpperSurround | $ReverseNumberFollows)* .?;
|
||||
$RULE6 = $Numeric $Format* $Extend* $ATerm;
|
||||
|
||||
# rule 7
|
||||
|
||||
$RULE7 = $Upper $Format* $Extend* $ATerm $Format* $Extend* $Upper;
|
||||
|
||||
# rule 8
|
||||
|
||||
$RULE8 = $Lower ($Format* $Extend* [^$OLetter $Upper $Lower $Sep])*
|
||||
($Format* $Extend* $Sp)* ($Format* $Extend* $Close)*
|
||||
$Format* $Extend* $ATerm;
|
||||
|
||||
# rule 9, 10, 11
|
||||
|
||||
# $CR $LF
|
||||
$End = $Sep | \u000a\u000d
|
||||
| $Format* $Extend* $Sp* $Format* $Extend* $Close* $Format*
|
||||
$Extend* ($Term | $ATerm)
|
||||
| $Sep $Format* $Extend* $Sp* $Format* $Extend* $Close* $Format*
|
||||
$Extend* ($Term | $ATerm);
|
||||
|
||||
# rule 12
|
||||
|
||||
$RULE12 = [^$Sep $Term $ATerm];
|
||||
|
||||
$Join = ($RULE6 | $RULE7 | $RULE8 | $RULE12)*;
|
||||
|
||||
$End;
|
||||
|
||||
$End? $Join [$RULE12 - $Sp - $Close];
|
||||
|
||||
# forces a break at the beginning of text "$Sp blah blah blah"
|
||||
# remember the break iterators takes the longest match
|
||||
$End? $Join $Sp / [^$Term $ATerm $Sp $Close];
|
||||
|
||||
# forces a break at the beginning of text "$Close blah blah blah"
|
||||
$End? $Join $Close / [^$Term $ATerm $Close];
|
||||
|
||||
## -------------------------------------------------
|
||||
|
||||
## !!safe_reverse;
|
||||
!!safe_reverse;
|
||||
|
||||
# rule 4
|
||||
$Extend+ [^$Extend];
|
||||
|
||||
# rule 7
|
||||
## $Extend* $ATerm $Format* $Extend* $Upper;
|
||||
$Extend* $ATerm $Format* $Extend* $Upper;
|
||||
|
||||
# rule 8
|
||||
($Extend* $Term)+ ($Extend* $Sp $Format*)* ($Extend* $Close $Format*)* $Extend* $ATerm;
|
||||
|
||||
# rule 11
|
||||
## ($Extend* $Sp $Format*)* ($Extend* $Close $Format*)* $Extend* ($Term | $ATerm);
|
||||
($Extend* $Sp $Format*)* ($Extend* $Close $Format*)*;
|
||||
($Extend* $Sp $Format*)* ($Extend* $Close $Format*)* $Extend* ($Term | $ATerm);
|
||||
|
||||
## -------------------------------------------------
|
||||
|
||||
!!safe_forward;
|
||||
|
||||
# rule 7
|
||||
|
||||
$ATerm $Extend* $Format* $Upper;
|
||||
|
||||
# rule 8
|
||||
|
||||
## $Lower .;
|
||||
$Lower .;
|
||||
|
||||
# rule 11
|
||||
|
||||
($Close $Extend* $Format*)* ($Sp $Extend* $Format*)*;
|
|
@ -3273,7 +3273,9 @@ void RBBITest::TestSentBreaks(void)
|
|||
BreakIterator *bi = BreakIterator::createSentenceInstance(locale, status);
|
||||
UChar str[100];
|
||||
char *strlist[] =
|
||||
{"This\n",
|
||||
{
|
||||
"Now\ris\nthe\r\ntime\n\rfor\r\r",
|
||||
"This\n",
|
||||
"Hello! how are you? I'am fine. Thankyou. How are you doing? This\n costs $20,00,000.",
|
||||
"\"Sentence ending with a quote.\" Bye.",
|
||||
" (This is it). Testing the sentence iterator. \"This isn't it.\"",
|
||||
|
@ -3295,19 +3297,7 @@ void RBBITest::TestSentBreaks(void)
|
|||
for (i = bi->first(); i != BreakIterator::DONE; i = bi->next()) {
|
||||
forward[count ++] = i;
|
||||
}
|
||||
int tempcount = count;
|
||||
for (i = bi->last(); i != BreakIterator::DONE; i = bi->previous()) {
|
||||
tempcount --;
|
||||
if (forward[tempcount] != i) {
|
||||
printStringBreaks(ustr, forward, count);
|
||||
errln("happy break test reverse failed: expected %d but got %d",
|
||||
forward[tempcount], i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (tempcount != 0) {
|
||||
errln("happy break test failed: missed a match");
|
||||
}
|
||||
testBreakBoundPreceding(this, ustr, bi, forward, count);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue