mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-18 19:25:49 +00:00
ICU-13194 RBBI safe tables, added another test.
X-SVN-Rev: 41157
This commit is contained in:
parent
b1b0be93ea
commit
e5ab76b130
2 changed files with 90 additions and 5 deletions
|
@ -17,6 +17,7 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "unicode/brkiter.h"
|
||||
|
@ -111,6 +112,7 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
|
|||
TESTCASE_AUTO(TestBug12677);
|
||||
TESTCASE_AUTO(TestTableRedundancies);
|
||||
TESTCASE_AUTO(TestBug13447);
|
||||
TESTCASE_AUTO(TestReverse);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -1817,7 +1819,7 @@ int32_t RBBICharMonkey::next(int32_t prevPos) {
|
|||
// a break if there are three or more contiguous RIs. If there are
|
||||
// only two, a break following will occur via other rules, and will include
|
||||
// any trailing extend characters, which is needed behavior.
|
||||
if (fRegionalIndicatorSet->contains(c0) && fRegionalIndicatorSet->contains(c1)
|
||||
if (fRegionalIndicatorSet->contains(c0) && fRegionalIndicatorSet->contains(c1)
|
||||
&& fRegionalIndicatorSet->contains(c2)) {
|
||||
break;
|
||||
}
|
||||
|
@ -3121,11 +3123,11 @@ int32_t RBBILineMonkey::next(int32_t startPos) {
|
|||
// LB 23a Do not break between numeric prefixes and ideographs, or between ideographs and numeric postfixes.
|
||||
// PR x (ID | EB | EM)
|
||||
// (ID | EB | EM) x PO
|
||||
if (fPR->contains(prevChar) &&
|
||||
if (fPR->contains(prevChar) &&
|
||||
(fID->contains(thisChar) || fEB->contains(thisChar) || fEM->contains(thisChar))) {
|
||||
continue;
|
||||
}
|
||||
if ((fID->contains(prevChar) || fEB->contains(prevChar) || fEM->contains(prevChar)) &&
|
||||
if ((fID->contains(prevChar) || fEB->contains(prevChar) || fEM->contains(prevChar)) &&
|
||||
fPO->contains(thisChar)) {
|
||||
continue;
|
||||
}
|
||||
|
@ -4422,7 +4424,7 @@ void RBBITest::TestBug12519() {
|
|||
return;
|
||||
}
|
||||
assertTrue(WHERE, Locale::getEnglish() == biEn->getLocale(ULOC_VALID_LOCALE, status));
|
||||
|
||||
|
||||
assertTrue(WHERE, Locale::getFrench() == biFr->getLocale(ULOC_VALID_LOCALE, status));
|
||||
assertTrue(WHERE "Locales do not participate in BreakIterator equality.", *biEn == *biFr);
|
||||
|
||||
|
@ -4462,7 +4464,7 @@ void RBBITest::TestBug12677() {
|
|||
|
||||
void RBBITest::TestTableRedundancies() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
|
||||
LocalPointer<RuleBasedBreakIterator> bi (
|
||||
(RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status));
|
||||
assertSuccess(WHERE, status);
|
||||
|
@ -4538,6 +4540,85 @@ void RBBITest::TestBug13447() {
|
|||
assertEquals(WHERE, UBRK_WORD_NUMBER, bi->getRuleStatus());
|
||||
}
|
||||
|
||||
// TestReverse exercises both the synthesized safe reverse rules and the logic
|
||||
// for filling the break iterator cache when starting from random positions
|
||||
// in the text.
|
||||
//
|
||||
// It's a monkey test, working on random data, with the expected data obtained
|
||||
// from forward iteration (no safe rules involved), comparing with results
|
||||
// when indexing into the interior of the string (safe rules needed).
|
||||
|
||||
void RBBITest::TestReverse() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
TestReverse(std::unique_ptr<RuleBasedBreakIterator>((RuleBasedBreakIterator *)
|
||||
BreakIterator::createCharacterInstance(Locale::getEnglish(), status)));
|
||||
assertSuccess(WHERE, status);
|
||||
TestReverse(std::unique_ptr<RuleBasedBreakIterator>((RuleBasedBreakIterator *)
|
||||
BreakIterator::createWordInstance(Locale::getEnglish(), status)));
|
||||
assertSuccess(WHERE, status);
|
||||
TestReverse(std::unique_ptr<RuleBasedBreakIterator>((RuleBasedBreakIterator *)
|
||||
BreakIterator::createLineInstance(Locale::getEnglish(), status)));
|
||||
assertSuccess(WHERE, status);
|
||||
TestReverse(std::unique_ptr<RuleBasedBreakIterator>((RuleBasedBreakIterator *)
|
||||
BreakIterator::createSentenceInstance(Locale::getEnglish(), status)));
|
||||
assertSuccess(WHERE, status);
|
||||
}
|
||||
|
||||
void RBBITest::TestReverse(std::unique_ptr<RuleBasedBreakIterator>bi) {
|
||||
if (!bi) {
|
||||
errln(WHERE);
|
||||
return;
|
||||
}
|
||||
|
||||
// From the mapping trie in the break iterator's internal data, create a
|
||||
// vector of UnicodeStrings, one for each character category, containing
|
||||
// all of the code points that map to that category. Unicode planes 0 and 1 only,
|
||||
// to avoid an execess of unassigned code points.
|
||||
|
||||
RBBIDataWrapper *data = bi->fData;
|
||||
int32_t categoryCount = data->fHeader->fCatCount;
|
||||
UTrie2 *trie = data->fTrie;
|
||||
|
||||
std::vector<UnicodeString> strings(categoryCount, UnicodeString());
|
||||
for (int cp=0; cp<0x1fff0; ++cp) {
|
||||
int cat = utrie2_get32(trie, cp);
|
||||
cat &= ~0x4000; // And off the dictionary bit from the category.
|
||||
assertTrue(WHERE, cat < categoryCount && cat >= 0);
|
||||
if (cat < 0 || cat >= categoryCount) return;
|
||||
strings[cat].append(cp);
|
||||
}
|
||||
|
||||
icu_rand randomGen;
|
||||
const int testStringLength = 10000;
|
||||
UnicodeString testString;
|
||||
|
||||
for (int i=0; i<testStringLength; ++i) {
|
||||
int charClass = randomGen() % categoryCount;
|
||||
if (strings[charClass].length() > 0) {
|
||||
int cp = strings[charClass].char32At(randomGen() % strings[charClass].length());
|
||||
testString.append(cp);
|
||||
}
|
||||
}
|
||||
|
||||
typedef std::pair<UBool, int32_t> Result;
|
||||
std::vector<Result> expectedResults;
|
||||
bi->setText(testString);
|
||||
for (int i=0; i<testString.length(); ++i) {
|
||||
bool isboundary = bi->isBoundary(i);
|
||||
int ruleStatus = bi->getRuleStatus();
|
||||
expectedResults.push_back(std::make_pair(isboundary, ruleStatus));
|
||||
}
|
||||
|
||||
for (int i=testString.length()-1; i>=0; --i) {
|
||||
bi->setText(testString); // clears the internal break cache
|
||||
Result expected = expectedResults[i];
|
||||
assertEquals(WHERE, expected.first, bi->isBoundary(i));
|
||||
assertEquals(WHERE, expected.second, bi->getRuleStatus());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// TestDebug - A place-holder test for debugging purposes.
|
||||
// For putting in fragments of other tests that can be invoked
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "intltest.h"
|
||||
#include "unicode/brkiter.h"
|
||||
#include "unicode/rbbi.h"
|
||||
|
@ -77,6 +79,8 @@ public:
|
|||
void TestBug12677();
|
||||
void TestTableRedundancies();
|
||||
void TestBug13447();
|
||||
void TestReverse();
|
||||
void TestReverse(std::unique_ptr<RuleBasedBreakIterator>bi);
|
||||
|
||||
void TestDebug();
|
||||
void TestProperties();
|
||||
|
|
Loading…
Add table
Reference in a new issue