ICU-2093 RBBI rule make dependencies for UnicodeSet properties adjusted.

Check for empty UnicodeSets added to builder.

X-SVN-Rev: 11476
This commit is contained in:
Andy Heninger 2003-04-09 00:09:14 +00:00
parent 48eda8bd06
commit 71070da39f
6 changed files with 33 additions and 14 deletions

View file

@ -1745,7 +1745,8 @@ _uBrkErrorName[U_BRK_ERROR_LIMIT - U_BRK_ERROR_START] = {
"U_BRK_MISMATCHED_PAREN",
"U_BRK_NEW_LINE_IN_QUOTED_STRING",
"U_BRK_UNDEFINED_VARIABLE",
"U_BRK_INIT_ERROR"
"U_BRK_INIT_ERROR",
"U_BRK_RULE_EMPTY_SET"
};
static const char * const

View file

@ -1085,10 +1085,24 @@ void RBBIRuleScanner::scanSet() {
// TODO: Get more accurate position of the error from UnicodeSet's return info.
// UnicodeSet appears to not be reporting correctly at this time.
RBBIDebugPrintf("UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex());
error(localStatus);
return;
error(localStatus);
delete uset;
return;
}
// Verify that the set contains at least one code point.
//
if (uset->charAt(0) == -1) {
// This set is empty.
// Make it an error, because it almost certainly is not what the user wanted.
// Also, avoids having to think about corner cases in the tree manipulation code
// that occurs later on.
error(U_BRK_RULE_EMPTY_SET);
delete uset;
return;
}
// Advance the RBBI parse postion over the UnicodeSet pattern.
// Don't just set fScanIndex because the line/char positions maintained
// for error reporting would be thrown off.
@ -1118,7 +1132,6 @@ void RBBIRuleScanner::scanSet() {
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

View file

@ -612,6 +612,7 @@ typedef enum UErrorCode {
U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */
U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */
U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
U_BRK_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for Break Iterator failures */
/*

View file

@ -29,10 +29,10 @@ $T = [:Hangul_Syllable_Type = T:];
$LV = [:Hangul_Syllable_Type = LV:];
$LVT = [:Hangul_Syllable_Type = LVT:];
$HungulSyllable = $L+ | ($L* ($LV? $V+ | $LV | $LVT) $T*) | $T+;
$HangulSyllable = $L+ | ($L* ($LV? $V+ | $LV | $LVT) $T*) | $T+;
$CR $LF;
([^$Control] | $HungulSyllable) $Extend*;
([^$Control] | $HangulSyllable) $Extend*;
.;

View file

@ -42,6 +42,8 @@ $Numeric = [:LineBreak = Numeric:];
# Character Class Definitions.
# The names are those from TR29.
#
$CR = \u000d;
$LF = \u000a;
$Control = [[:Zl:] [:Zp:] [:Cc:] [:Cf:]];
$Extend = [[:Grapheme_Extend = TRUE:]];
@ -110,7 +112,7 @@ $KatakanaEx+ {300};
# Controls are do not.
#
[^$Control [:Ideographic:]] $Extend*;
[\u000d][\u000a];
$CR $LF;
#
# Reverse Rules. Back up over any of the chars that can group together.

View file

@ -274,25 +274,27 @@ $(BRK_FILES:.brk" =.brk"
# TODO: set up an inference rule, so these don't need to be written out one by one...
#
"$(ICUBLD)\$(ICUDT)char.brk" : "$(ICUBRK)\char.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
BRKDEPS = "$(ICUBLD)\$(ICUDT)uprops.icu" "$(ICUBLD)\$(ICUDT)unames.icu" "$(ICUBLD)\$(ICUDT)pnames.icu" "$(ICUBLD)\$(ICUDT)unorm.icu"
"$(ICUBLD)\$(ICUDT)char.brk" : "$(ICUBRK)\char.txt" $(BRKDEPS)
genbrk -r "$(ICUBRK)\char.txt" -o "$(ICUBLD)\$(ICUDT)char.brk" -i "$(ICUBLD)\\"
"$(ICUBLD)\$(ICUDT)word.brk" : "$(ICUBRK)\word.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
"$(ICUBLD)\$(ICUDT)word.brk" : "$(ICUBRK)\word.txt" $(BRKDEPS)
genbrk -r "$(ICUBRK)\word.txt" -o "$(ICUBLD)\$(ICUDT)word.brk" -i "$(ICUBLD)\\"
"$(ICUBLD)\$(ICUDT)line.brk" : "$(ICUBRK)\line.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
"$(ICUBLD)\$(ICUDT)line.brk" : "$(ICUBRK)\line.txt" $(BRKDEPS)
genbrk -r "$(ICUBRK)\line.txt" -o "$(ICUBLD)\$(ICUDT)line.brk" -i "$(ICUBLD)\\"
"$(ICUBLD)\$(ICUDT)sent.brk" : "$(ICUBRK)\sent.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
"$(ICUBLD)\$(ICUDT)sent.brk" : "$(ICUBRK)\sent.txt" $(BRKDEPS)
genbrk -r "$(ICUBRK)\sent.txt" -o "$(ICUBLD)\$(ICUDT)sent.brk" -i "$(ICUBLD)\\"
"$(ICUBLD)\$(ICUDT)title.brk" : "$(ICUBRK)\title.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
"$(ICUBLD)\$(ICUDT)title.brk" : "$(ICUBRK)\title.txt" $(BRKDEPS)
genbrk -r "$(ICUBRK)\title.txt" -o "$(ICUBLD)\$(ICUDT)title.brk" -i "$(ICUBLD)\\"
"$(ICUBLD)\$(ICUDT)word_th.brk" : "$(ICUBRK)\word_th.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
"$(ICUBLD)\$(ICUDT)word_th.brk" : "$(ICUBRK)\word_th.txt" $(BRKDEPS)
genbrk -r "$(ICUBRK)\word_th.txt" -o "$(ICUBLD)\$(ICUDT)word_th.brk" -i "$(ICUBLD)\\"
"$(ICUBLD)\$(ICUDT)line_th.brk" : "$(ICUBRK)\line_th.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
"$(ICUBLD)\$(ICUDT)line_th.brk" : "$(ICUBRK)\line_th.txt" $(BRKDEPS)
genbrk -r "$(ICUBRK)\line_th.txt" -o "$(ICUBLD)\$(ICUDT)line_th.brk" -i "$(ICUBLD)\\"