From 3c9eea1d58475f3d9e39ce3219ec16fdd56bbcf7 Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Wed, 22 Oct 2003 00:43:37 +0000 Subject: [PATCH] ICU-2924 RBBI, fix rule roundtriping error with !! rule options. X-SVN-Rev: 13470 --- icu4c/source/common/rbbirpt.h | 172 +++++++++++++++--------------- icu4c/source/common/rbbirpt.txt | 8 +- icu4c/source/data/brkitr/line.txt | 4 +- 3 files changed, 94 insertions(+), 90 deletions(-) diff --git a/icu4c/source/common/rbbirpt.h b/icu4c/source/common/rbbirpt.h index 54d53f941d2..908d2b5ad9d 100644 --- a/icu4c/source/common/rbbirpt.h +++ b/icu4c/source/common/rbbirpt.h @@ -75,100 +75,101 @@ struct RBBIRuleTableEl { static const struct RBBIRuleTableEl gRuleParseStateTable[] = { {doNOP, 0, 0, 0, TRUE} - , {doExprStart, 254, 20, 8, FALSE} // 1 start + , {doExprStart, 254, 21, 8, FALSE} // 1 start , {doNOP, 129, 1,0, TRUE} // 2 - , {doExprStart, 36 /* $ */, 79, 89, FALSE} // 3 + , {doExprStart, 36 /* $ */, 80, 90, FALSE} // 3 , {doNOP, 33 /* ! */, 11,0, TRUE} // 4 , {doNOP, 59 /* ; */, 1,0, TRUE} // 5 , {doNOP, 252, 0,0, FALSE} // 6 - , {doExprStart, 255, 20, 8, FALSE} // 7 + , {doExprStart, 255, 21, 8, FALSE} // 7 , {doEndOfRule, 59 /* ; */, 1,0, TRUE} // 8 break-rule-end , {doNOP, 129, 8,0, TRUE} // 9 - , {doRuleError, 255, 94,0, FALSE} // 10 + , {doRuleError, 255, 95,0, FALSE} // 10 , {doNOP, 33 /* ! */, 13,0, TRUE} // 11 rev-option - , {doReverseDir, 255, 19, 8, FALSE} // 12 + , {doReverseDir, 255, 20, 8, FALSE} // 12 , {doOptionStart, 131, 15,0, TRUE} // 13 option-scan1 - , {doRuleError, 255, 94,0, FALSE} // 14 + , {doRuleError, 255, 95,0, FALSE} // 14 , {doNOP, 132, 15,0, TRUE} // 15 option-scan2 - , {doOptionEnd, 129, 1,0, FALSE} // 16 - , {doOptionEnd, 59 /* ; */, 1,0, FALSE} // 17 - , {doRuleError, 255, 94,0, FALSE} // 18 - , {doExprStart, 255, 20, 8, FALSE} // 19 reverse-rule - , {doRuleChar, 254, 29,0, TRUE} // 20 term - , {doNOP, 129, 20,0, TRUE} // 21 - , {doRuleChar, 130, 29,0, TRUE} // 22 - , {doNOP, 91 /* [ */, 85, 29, FALSE} // 23 - , {doLParen, 40 /* ( */, 20, 29, TRUE} // 24 - , {doNOP, 36 /* $ */, 79, 28, FALSE} // 25 - , {doDotAny, 46 /* . */, 29,0, TRUE} // 26 - , {doRuleError, 255, 94,0, FALSE} // 27 - , {doCheckVarDef, 255, 29,0, FALSE} // 28 term-var-ref - , {doNOP, 129, 29,0, TRUE} // 29 expr-mod - , {doUnaryOpStar, 42 /* * */, 34,0, TRUE} // 30 - , {doUnaryOpPlus, 43 /* + */, 34,0, TRUE} // 31 - , {doUnaryOpQuestion, 63 /* ? */, 34,0, TRUE} // 32 - , {doNOP, 255, 34,0, FALSE} // 33 - , {doExprCatOperator, 254, 20,0, FALSE} // 34 expr-cont - , {doNOP, 129, 34,0, TRUE} // 35 - , {doExprCatOperator, 130, 20,0, FALSE} // 36 - , {doExprCatOperator, 91 /* [ */, 20,0, FALSE} // 37 - , {doExprCatOperator, 40 /* ( */, 20,0, FALSE} // 38 - , {doExprCatOperator, 36 /* $ */, 20,0, FALSE} // 39 - , {doExprCatOperator, 46 /* . */, 20,0, FALSE} // 40 - , {doExprCatOperator, 47 /* / */, 46,0, FALSE} // 41 - , {doExprCatOperator, 123 /* { */, 58,0, TRUE} // 42 - , {doExprOrOperator, 124 /* | */, 20,0, TRUE} // 43 - , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 44 - , {doExprFinished, 255, 255,0, FALSE} // 45 - , {doSlash, 47 /* / */, 48,0, TRUE} // 46 look-ahead - , {doNOP, 255, 94,0, FALSE} // 47 - , {doExprCatOperator, 254, 20,0, FALSE} // 48 expr-cont-no-slash - , {doNOP, 129, 34,0, TRUE} // 49 - , {doExprCatOperator, 130, 20,0, FALSE} // 50 - , {doExprCatOperator, 91 /* [ */, 20,0, FALSE} // 51 - , {doExprCatOperator, 40 /* ( */, 20,0, FALSE} // 52 - , {doExprCatOperator, 36 /* $ */, 20,0, FALSE} // 53 - , {doExprCatOperator, 46 /* . */, 20,0, FALSE} // 54 - , {doExprOrOperator, 124 /* | */, 20,0, TRUE} // 55 - , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 56 - , {doExprFinished, 255, 255,0, FALSE} // 57 - , {doNOP, 129, 58,0, TRUE} // 58 tag-open - , {doStartTagValue, 128, 61,0, FALSE} // 59 - , {doTagExpectedError, 255, 94,0, FALSE} // 60 - , {doNOP, 129, 65,0, TRUE} // 61 tag-value - , {doNOP, 125 /* } */, 65,0, FALSE} // 62 - , {doTagDigit, 128, 61,0, TRUE} // 63 - , {doTagExpectedError, 255, 94,0, FALSE} // 64 - , {doNOP, 129, 65,0, TRUE} // 65 tag-close - , {doTagValue, 125 /* } */, 68,0, TRUE} // 66 - , {doTagExpectedError, 255, 94,0, FALSE} // 67 - , {doExprCatOperator, 254, 20,0, FALSE} // 68 expr-cont-no-tag - , {doNOP, 129, 68,0, TRUE} // 69 - , {doExprCatOperator, 130, 20,0, FALSE} // 70 - , {doExprCatOperator, 91 /* [ */, 20,0, FALSE} // 71 - , {doExprCatOperator, 40 /* ( */, 20,0, FALSE} // 72 - , {doExprCatOperator, 36 /* $ */, 20,0, FALSE} // 73 - , {doExprCatOperator, 46 /* . */, 20,0, FALSE} // 74 - , {doExprCatOperator, 47 /* / */, 46,0, FALSE} // 75 - , {doExprOrOperator, 124 /* | */, 20,0, TRUE} // 76 - , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 77 - , {doExprFinished, 255, 255,0, FALSE} // 78 - , {doStartVariableName, 36 /* $ */, 81,0, TRUE} // 79 scan-var-name - , {doNOP, 255, 94,0, FALSE} // 80 - , {doNOP, 131, 83,0, TRUE} // 81 scan-var-start - , {doVariableNameExpectedErr, 255, 94,0, FALSE} // 82 - , {doNOP, 132, 83,0, TRUE} // 83 scan-var-body - , {doEndVariableName, 255, 255,0, FALSE} // 84 - , {doScanUnicodeSet, 91 /* [ */, 255,0, TRUE} // 85 scan-unicode-set - , {doScanUnicodeSet, 112 /* p */, 255,0, TRUE} // 86 - , {doScanUnicodeSet, 80 /* P */, 255,0, TRUE} // 87 - , {doNOP, 255, 94,0, FALSE} // 88 - , {doNOP, 129, 89,0, TRUE} // 89 assign-or-rule - , {doStartAssign, 61 /* = */, 20, 92, TRUE} // 90 - , {doNOP, 255, 28, 8, FALSE} // 91 - , {doEndAssign, 59 /* ; */, 1,0, TRUE} // 92 assign-end - , {doRuleErrorAssignExpr, 255, 94,0, FALSE} // 93 - , {doExit, 255, 94,0, TRUE} // 94 errorDeath + , {doOptionEnd, 255, 17,0, FALSE} // 16 + , {doNOP, 59 /* ; */, 1,0, TRUE} // 17 option-scan3 + , {doNOP, 129, 17,0, TRUE} // 18 + , {doNOP, 255, 95,0, FALSE} // 19 + , {doExprStart, 255, 21, 8, FALSE} // 20 reverse-rule + , {doRuleChar, 254, 30,0, TRUE} // 21 term + , {doNOP, 129, 21,0, TRUE} // 22 + , {doRuleChar, 130, 30,0, TRUE} // 23 + , {doNOP, 91 /* [ */, 86, 30, FALSE} // 24 + , {doLParen, 40 /* ( */, 21, 30, TRUE} // 25 + , {doNOP, 36 /* $ */, 80, 29, FALSE} // 26 + , {doDotAny, 46 /* . */, 30,0, TRUE} // 27 + , {doRuleError, 255, 95,0, FALSE} // 28 + , {doCheckVarDef, 255, 30,0, FALSE} // 29 term-var-ref + , {doNOP, 129, 30,0, TRUE} // 30 expr-mod + , {doUnaryOpStar, 42 /* * */, 35,0, TRUE} // 31 + , {doUnaryOpPlus, 43 /* + */, 35,0, TRUE} // 32 + , {doUnaryOpQuestion, 63 /* ? */, 35,0, TRUE} // 33 + , {doNOP, 255, 35,0, FALSE} // 34 + , {doExprCatOperator, 254, 21,0, FALSE} // 35 expr-cont + , {doNOP, 129, 35,0, TRUE} // 36 + , {doExprCatOperator, 130, 21,0, FALSE} // 37 + , {doExprCatOperator, 91 /* [ */, 21,0, FALSE} // 38 + , {doExprCatOperator, 40 /* ( */, 21,0, FALSE} // 39 + , {doExprCatOperator, 36 /* $ */, 21,0, FALSE} // 40 + , {doExprCatOperator, 46 /* . */, 21,0, FALSE} // 41 + , {doExprCatOperator, 47 /* / */, 47,0, FALSE} // 42 + , {doExprCatOperator, 123 /* { */, 59,0, TRUE} // 43 + , {doExprOrOperator, 124 /* | */, 21,0, TRUE} // 44 + , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 45 + , {doExprFinished, 255, 255,0, FALSE} // 46 + , {doSlash, 47 /* / */, 49,0, TRUE} // 47 look-ahead + , {doNOP, 255, 95,0, FALSE} // 48 + , {doExprCatOperator, 254, 21,0, FALSE} // 49 expr-cont-no-slash + , {doNOP, 129, 35,0, TRUE} // 50 + , {doExprCatOperator, 130, 21,0, FALSE} // 51 + , {doExprCatOperator, 91 /* [ */, 21,0, FALSE} // 52 + , {doExprCatOperator, 40 /* ( */, 21,0, FALSE} // 53 + , {doExprCatOperator, 36 /* $ */, 21,0, FALSE} // 54 + , {doExprCatOperator, 46 /* . */, 21,0, FALSE} // 55 + , {doExprOrOperator, 124 /* | */, 21,0, TRUE} // 56 + , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 57 + , {doExprFinished, 255, 255,0, FALSE} // 58 + , {doNOP, 129, 59,0, TRUE} // 59 tag-open + , {doStartTagValue, 128, 62,0, FALSE} // 60 + , {doTagExpectedError, 255, 95,0, FALSE} // 61 + , {doNOP, 129, 66,0, TRUE} // 62 tag-value + , {doNOP, 125 /* } */, 66,0, FALSE} // 63 + , {doTagDigit, 128, 62,0, TRUE} // 64 + , {doTagExpectedError, 255, 95,0, FALSE} // 65 + , {doNOP, 129, 66,0, TRUE} // 66 tag-close + , {doTagValue, 125 /* } */, 69,0, TRUE} // 67 + , {doTagExpectedError, 255, 95,0, FALSE} // 68 + , {doExprCatOperator, 254, 21,0, FALSE} // 69 expr-cont-no-tag + , {doNOP, 129, 69,0, TRUE} // 70 + , {doExprCatOperator, 130, 21,0, FALSE} // 71 + , {doExprCatOperator, 91 /* [ */, 21,0, FALSE} // 72 + , {doExprCatOperator, 40 /* ( */, 21,0, FALSE} // 73 + , {doExprCatOperator, 36 /* $ */, 21,0, FALSE} // 74 + , {doExprCatOperator, 46 /* . */, 21,0, FALSE} // 75 + , {doExprCatOperator, 47 /* / */, 47,0, FALSE} // 76 + , {doExprOrOperator, 124 /* | */, 21,0, TRUE} // 77 + , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 78 + , {doExprFinished, 255, 255,0, FALSE} // 79 + , {doStartVariableName, 36 /* $ */, 82,0, TRUE} // 80 scan-var-name + , {doNOP, 255, 95,0, FALSE} // 81 + , {doNOP, 131, 84,0, TRUE} // 82 scan-var-start + , {doVariableNameExpectedErr, 255, 95,0, FALSE} // 83 + , {doNOP, 132, 84,0, TRUE} // 84 scan-var-body + , {doEndVariableName, 255, 255,0, FALSE} // 85 + , {doScanUnicodeSet, 91 /* [ */, 255,0, TRUE} // 86 scan-unicode-set + , {doScanUnicodeSet, 112 /* p */, 255,0, TRUE} // 87 + , {doScanUnicodeSet, 80 /* P */, 255,0, TRUE} // 88 + , {doNOP, 255, 95,0, FALSE} // 89 + , {doNOP, 129, 90,0, TRUE} // 90 assign-or-rule + , {doStartAssign, 61 /* = */, 21, 93, TRUE} // 91 + , {doNOP, 255, 29, 8, FALSE} // 92 + , {doEndAssign, 59 /* ; */, 1,0, TRUE} // 93 assign-end + , {doRuleErrorAssignExpr, 255, 95,0, FALSE} // 94 + , {doExit, 255, 95,0, TRUE} // 95 errorDeath }; static const char * const RBBIRuleStateNames[] = { 0, "start", @@ -187,6 +188,7 @@ static const char * const RBBIRuleStateNames[] = { 0, 0, "option-scan2", 0, + "option-scan3", 0, 0, "reverse-rule", diff --git a/icu4c/source/common/rbbirpt.txt b/icu4c/source/common/rbbirpt.txt index 9c6a45c1e56..00f6f94a515 100644 --- a/icu4c/source/common/rbbirpt.txt +++ b/icu4c/source/common/rbbirpt.txt @@ -86,10 +86,12 @@ option-scan1: option-scan2: name_char n option-scan2 - white_space start doOptionEnd - ';' start doOptionEnd - default errorDeath doRuleError + default option-scan3 doOptionEnd +option-scan3: + ';' n start + white_space n option-scan3 + default errorDeath reverse-rule: diff --git a/icu4c/source/data/brkitr/line.txt b/icu4c/source/data/brkitr/line.txt index b590e1d1a8a..317f773fa33 100644 --- a/icu4c/source/data/brkitr/line.txt +++ b/icu4c/source/data/brkitr/line.txt @@ -12,8 +12,8 @@ # Character Classes defined by TR 14. # -!!chain -!!LBCMNoChain +!!chain ; +!!LBCMNoChain; $AI = [:LineBreak = Ambiguous:]; $AL = [:LineBreak = Alphabetic:];