ICU-13569 rbbi table compression, work in progress.

X-SVN-Rev: 40873
This commit is contained in:
Andy Heninger 2018-02-08 21:17:18 +00:00
parent 3d4a3fbaa8
commit 4959b9b3a3
4 changed files with 36 additions and 17 deletions

View file

@ -357,11 +357,17 @@ void RBBIRuleBuilder::optimizeTables() {
int32_t leftClass;
int32_t rightClass;
leftClass = 1;
rightClass = 2;
leftClass = 3;
rightClass = 4;
printf("Optimizing tables ...\n");
while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) {
printf("Merging duplicate columns (%d, %d)\n", leftClass, rightClass);
fSetBuilder->mergeCategories(leftClass, rightClass);
fForwardTables->removeColumn(rightClass);
fReverseTables->removeColumn(rightClass);
fSafeFwdTables->removeColumn(rightClass);
fSafeRevTables->removeColumn(rightClass);
}

View file

@ -1090,7 +1090,7 @@ bool RBBITableBuilder::findDuplCharClassFrom(int32_t &baseCategory, int32_t &dup
uint16_t table_base;
uint16_t table_dupl;
for (; baseCategory < numCols-1; ++baseCategory) {
for (; duplCategory < numCols; ++duplCategory) {
for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) {
for (int32_t state=0; state<numStates; state++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
table_base = (uint16_t)sd->fDtran->elementAti(baseCategory);

View file

@ -4461,9 +4461,24 @@ void RBBITest::TestBug12677() {
void RBBITest::TestTableRedundancies() {
UErrorCode status = U_ZERO_ERROR;
UnicodeString rules {u"$s0=[;,*]; \n"
"$s1=[a-z]; \n"
"$s2=[i-n]; \n"
"$s3=[x-z]; \n"
"!!forward; \n"
"($s0 | '?')*; \n"
"($s1 | $s2 | $s3)*; \n" };
RuleBasedBreakIterator *lbi =
(RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
rules = lbi->getRules();
UParseError pe {};
RuleBasedBreakIterator *bi =
(RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
// bi->dumpTables();
// (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
new RuleBasedBreakIterator(rules, pe, status);
assertSuccess(WHERE, status);
if (U_FAILURE(status)) return;
bi->dumpTables();
RBBIDataWrapper *dw = bi->fData;
const RBBIStateTable *fwtbl = dw->fForwardTable;

View file

@ -39,18 +39,16 @@
# Temp debugging tests
<locale en>
<word>
<data><0>コンピューター<400>は<400>、<0>本質<400>的<400>に<400>は<400>数字<400>しか<400>扱う<400>こと<400>が<400>でき<400>ま<400>せん<400>。<0>\
コンピューター<400>は<400>、<0>文字<400>や<400>記号<400>など<400>の<400>それぞれに<400>番号<400>を<400>割り振る<400>こと<400>によって<400>扱える<400>\
よう<400>にし<400>ます<400>。<0>ユニ<400>コード<400>が<400>出来る<400>まで<400>は<400>、<0>これらの<400>番号<400>を<400>割り振る<400>仕組み<400>が<400>\
何<400>百<400>種類<400>も<400>存在<400>しま<400>した<400>。<0>どの<400>一つ<400>を<400>とっても<400>、<0>十分<400>な<400>文字<400>を<400>含<400>\
んで<400>は<400>いま<400>せん<400>で<400>した<400>。<0>例えば<400>、<0>欧州<400>連合<400>一つ<400>を<400>見<400>て<400>も<400>、<0>その<400>\
すべて<400>の<400>言語<400>を<400>カバー<400>する<400>ため<400>に<400>は<400>、<0>いくつか<400>の<400>異なる<400>符号<400>化<400>の<400>仕組み<400>\
が<400>必要<400>で<400>した<400>。<0>英語<400>の<400>よう<400>な<400>一つ<400>の<400>言語<400>に<400>限<400>って<400>も<400>、<0>一つ<400>だけ<400>\
の<400>符号<400>化<400>の<400>仕組み<400>では<400>、<0>一般<400>的<400>に<400>使<400>われる<400>すべて<400>の<400>文字<400>、<0>句読点<400>、<0>\
。<0></data>
#<data><0>コンピューター<400>は<400>、<0>本質<400>的<400>に<400>は<400>数字<400>しか<400>扱う<400>こと<400>が<400>でき<400>ま<400>せん<400>。<0>\
<rules>
$s0=[;,*];
$s1=[a-z];
$s2=[i-n];
$s3=[x-z];
!!forward;
($s0 | '?')*
($s1 | $s2 | $s3)*;
</rules>
<data>•hello• •</data>
## FILTERED BREAK TESTS