mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-21 04:29:31 +00:00
ICU-2093 RBBI rule make dependencies for UnicodeSet properties adjusted.
Check for empty UnicodeSets added to builder. X-SVN-Rev: 11476
This commit is contained in:
parent
48eda8bd06
commit
71070da39f
6 changed files with 33 additions and 14 deletions
|
@ -1745,7 +1745,8 @@ _uBrkErrorName[U_BRK_ERROR_LIMIT - U_BRK_ERROR_START] = {
|
|||
"U_BRK_MISMATCHED_PAREN",
|
||||
"U_BRK_NEW_LINE_IN_QUOTED_STRING",
|
||||
"U_BRK_UNDEFINED_VARIABLE",
|
||||
"U_BRK_INIT_ERROR"
|
||||
"U_BRK_INIT_ERROR",
|
||||
"U_BRK_RULE_EMPTY_SET"
|
||||
};
|
||||
|
||||
static const char * const
|
||||
|
|
|
@ -1085,10 +1085,24 @@ void RBBIRuleScanner::scanSet() {
|
|||
// TODO: Get more accurate position of the error from UnicodeSet's return info.
|
||||
// UnicodeSet appears to not be reporting correctly at this time.
|
||||
RBBIDebugPrintf("UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex());
|
||||
error(localStatus);
|
||||
return;
|
||||
error(localStatus);
|
||||
delete uset;
|
||||
return;
|
||||
}
|
||||
|
||||
// Verify that the set contains at least one code point.
|
||||
//
|
||||
if (uset->charAt(0) == -1) {
|
||||
// This set is empty.
|
||||
// Make it an error, because it almost certainly is not what the user wanted.
|
||||
// Also, avoids having to think about corner cases in the tree manipulation code
|
||||
// that occurs later on.
|
||||
error(U_BRK_RULE_EMPTY_SET);
|
||||
delete uset;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Advance the RBBI parse postion over the UnicodeSet pattern.
|
||||
// Don't just set fScanIndex because the line/char positions maintained
|
||||
// for error reporting would be thrown off.
|
||||
|
@ -1118,7 +1132,6 @@ void RBBIRuleScanner::scanSet() {
|
|||
|
||||
}
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
|
|
@ -612,6 +612,7 @@ typedef enum UErrorCode {
|
|||
U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */
|
||||
U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */
|
||||
U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
|
||||
U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
|
||||
U_BRK_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for Break Iterator failures */
|
||||
|
||||
/*
|
||||
|
|
|
@ -29,10 +29,10 @@ $T = [:Hangul_Syllable_Type = T:];
|
|||
$LV = [:Hangul_Syllable_Type = LV:];
|
||||
$LVT = [:Hangul_Syllable_Type = LVT:];
|
||||
|
||||
$HungulSyllable = $L+ | ($L* ($LV? $V+ | $LV | $LVT) $T*) | $T+;
|
||||
$HangulSyllable = $L+ | ($L* ($LV? $V+ | $LV | $LVT) $T*) | $T+;
|
||||
|
||||
$CR $LF;
|
||||
([^$Control] | $HungulSyllable) $Extend*;
|
||||
([^$Control] | $HangulSyllable) $Extend*;
|
||||
.;
|
||||
|
||||
|
||||
|
|
|
@ -42,6 +42,8 @@ $Numeric = [:LineBreak = Numeric:];
|
|||
# Character Class Definitions.
|
||||
# The names are those from TR29.
|
||||
#
|
||||
$CR = \u000d;
|
||||
$LF = \u000a;
|
||||
$Control = [[:Zl:] [:Zp:] [:Cc:] [:Cf:]];
|
||||
$Extend = [[:Grapheme_Extend = TRUE:]];
|
||||
|
||||
|
@ -110,7 +112,7 @@ $KatakanaEx+ {300};
|
|||
# Controls are do not.
|
||||
#
|
||||
[^$Control [:Ideographic:]] $Extend*;
|
||||
[\u000d][\u000a];
|
||||
$CR $LF;
|
||||
|
||||
#
|
||||
# Reverse Rules. Back up over any of the chars that can group together.
|
||||
|
|
|
@ -274,25 +274,27 @@ $(BRK_FILES:.brk" =.brk"
|
|||
# TODO: set up an inference rule, so these don't need to be written out one by one...
|
||||
#
|
||||
|
||||
"$(ICUBLD)\$(ICUDT)char.brk" : "$(ICUBRK)\char.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
|
||||
BRKDEPS = "$(ICUBLD)\$(ICUDT)uprops.icu" "$(ICUBLD)\$(ICUDT)unames.icu" "$(ICUBLD)\$(ICUDT)pnames.icu" "$(ICUBLD)\$(ICUDT)unorm.icu"
|
||||
|
||||
"$(ICUBLD)\$(ICUDT)char.brk" : "$(ICUBRK)\char.txt" $(BRKDEPS)
|
||||
genbrk -r "$(ICUBRK)\char.txt" -o "$(ICUBLD)\$(ICUDT)char.brk" -i "$(ICUBLD)\\"
|
||||
|
||||
"$(ICUBLD)\$(ICUDT)word.brk" : "$(ICUBRK)\word.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
|
||||
"$(ICUBLD)\$(ICUDT)word.brk" : "$(ICUBRK)\word.txt" $(BRKDEPS)
|
||||
genbrk -r "$(ICUBRK)\word.txt" -o "$(ICUBLD)\$(ICUDT)word.brk" -i "$(ICUBLD)\\"
|
||||
|
||||
"$(ICUBLD)\$(ICUDT)line.brk" : "$(ICUBRK)\line.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
|
||||
"$(ICUBLD)\$(ICUDT)line.brk" : "$(ICUBRK)\line.txt" $(BRKDEPS)
|
||||
genbrk -r "$(ICUBRK)\line.txt" -o "$(ICUBLD)\$(ICUDT)line.brk" -i "$(ICUBLD)\\"
|
||||
|
||||
"$(ICUBLD)\$(ICUDT)sent.brk" : "$(ICUBRK)\sent.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
|
||||
"$(ICUBLD)\$(ICUDT)sent.brk" : "$(ICUBRK)\sent.txt" $(BRKDEPS)
|
||||
genbrk -r "$(ICUBRK)\sent.txt" -o "$(ICUBLD)\$(ICUDT)sent.brk" -i "$(ICUBLD)\\"
|
||||
|
||||
"$(ICUBLD)\$(ICUDT)title.brk" : "$(ICUBRK)\title.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
|
||||
"$(ICUBLD)\$(ICUDT)title.brk" : "$(ICUBRK)\title.txt" $(BRKDEPS)
|
||||
genbrk -r "$(ICUBRK)\title.txt" -o "$(ICUBLD)\$(ICUDT)title.brk" -i "$(ICUBLD)\\"
|
||||
|
||||
"$(ICUBLD)\$(ICUDT)word_th.brk" : "$(ICUBRK)\word_th.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
|
||||
"$(ICUBLD)\$(ICUDT)word_th.brk" : "$(ICUBRK)\word_th.txt" $(BRKDEPS)
|
||||
genbrk -r "$(ICUBRK)\word_th.txt" -o "$(ICUBLD)\$(ICUDT)word_th.brk" -i "$(ICUBLD)\\"
|
||||
|
||||
"$(ICUBLD)\$(ICUDT)line_th.brk" : "$(ICUBRK)\line_th.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
|
||||
"$(ICUBLD)\$(ICUDT)line_th.brk" : "$(ICUBRK)\line_th.txt" $(BRKDEPS)
|
||||
genbrk -r "$(ICUBRK)\line_th.txt" -o "$(ICUBLD)\$(ICUDT)line_th.brk" -i "$(ICUBLD)\\"
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue