ICU-1749 ported TestStrings TestStringPatterns and edited exhaustive test to check iterator and round trip

X-SVN-Rev: 8525
This commit is contained in:
Alan Liu 2002-04-26 06:28:15 +00:00
parent daea6a76cb
commit 2edd9a085e
2 changed files with 219 additions and 3 deletions

View file

@ -13,6 +13,7 @@
#include "usettest.h"
#include "unicode/uniset.h"
#include "unicode/unicode.h"
#include "unicode/usetiter.h"
UnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) {
@ -47,6 +48,8 @@ UnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
CASE(9,TestExhaustive);
CASE(10,TestToPattern);
CASE(11,TestIndexOf);
CASE(12,TestStrings);
CASE(13,TestStringPatterns);
default: name = ""; break;
}
}
@ -502,6 +505,87 @@ void UnicodeSetTest::TestAPI() {
}
}
void UnicodeSetTest::TestStrings() {
UErrorCode ec = U_ZERO_ERROR;
UnicodeSet* testList[] = {
UnicodeSet::createFromAll("abc"),
new UnicodeSet("[a-c]", ec),
&(UnicodeSet::createFrom("ch")->add('a','z').add("ll")),
new UnicodeSet("[{ll}{ch}a-z]", ec),
UnicodeSet::createFrom("ab}c"),
new UnicodeSet("[{ab\\}c}]", ec),
&((new UnicodeSet('a','z'))->add('A', 'Z').retain('M','m').complement('X')),
new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]", ec),
NULL
};
if (U_FAILURE(ec)) {
errln("FAIL: couldn't construct test sets");
}
for (int32_t i = 0; testList[i] != NULL; i+=2) {
if (U_SUCCESS(ec)) {
UnicodeString pat0, pat1;
testList[i]->toPattern(pat0, TRUE);
testList[i+1]->toPattern(pat1, TRUE);
if (*testList[i] == *testList[i+1]) {
logln((UnicodeString)"Ok: " + pat0 + " == " + pat1);
} else {
logln((UnicodeString)"FAIL: " + pat0 + " != " + pat1);
}
}
delete testList[i];
delete testList[i+1];
}
}
static char* NOT = "%%%%";
/**
* Test pattern behavior of multicharacter strings.
*/
void UnicodeSetTest::TestStringPatterns() {
UErrorCode ec = U_ZERO_ERROR;
UnicodeSet* s = new UnicodeSet("[a-z {aa} {ab}]", ec);
// This loop isn't a loop. It's here to make the compiler happy.
// If you're curious, try removing it and changing the 'break'
// statements (except for the last) to goto's.
for (;;) {
if (U_FAILURE(ec)) break;
char* exp1[] = {"aa", "ab", NOT, "ac", NULL};
expectToPattern(*s, "[a-z{aa}{ab}]", exp1);
s->add("ac");
char* exp2[] = {"aa", "ab", "ac", NOT, "xy", NULL};
expectToPattern(*s, "[a-z{aa}{ab}{ac}]", exp2);
s->applyPattern("[a-z {\\{l} {r\\}}]", ec);
if (U_FAILURE(ec)) break;
char* exp3[] = {"{l", "r}", NOT, "xy", NULL};
expectToPattern(*s, "[a-z{\\{l}{r\\}}]", exp3);
s->add("[]");
char* exp4[] = {"{l", "r}", "[]", NOT, "xy", NULL};
expectToPattern(*s, "[a-z{\\[\\]}{r\\}}{\\{l}]", exp4);
s->applyPattern("[a-z {\\u4E01\\u4E02}{\\n\\r}]", ec);
if (U_FAILURE(ec)) break;
char* exp5[] = {"\\u4E01\\u4E02", "\n\r", NULL};
expectToPattern(*s, "[a-z{\\u4E01\\u4E02}{\\n\\r}]", exp5);
break;
}
if (U_FAILURE(ec)) errln("FAIL: pattern parse error");
delete s;
}
/**
* Test the [:Latin:] syntax.
*/
@ -581,12 +665,16 @@ void UnicodeSetTest::TestExhaustive() {
int32_t limit = 128;
UnicodeSet x, y, z;
UnicodeSet x, y, z, aa;
for (int32_t i = 0; i < limit; ++i) {
bitsToSet(i, x);
logln((UnicodeString)"Testing " + i + ", " + x);
_testComplement(i, x, y);
// AS LONG AS WE ARE HERE, check roundtrip
checkRoundTrip(bitsToSet(i, aa));
for (int32_t j = 0; j < limit; ++j) {
_testAdd(i,j, x,y,z);
_testXor(i,j, x,y,z);
@ -695,13 +783,14 @@ void UnicodeSetTest::checkCanonicalRep(const UnicodeSet& set, const UnicodeStrin
/**
* Convert a bitmask to a UnicodeSet.
*/
void UnicodeSetTest::bitsToSet(int32_t a, UnicodeSet& result) {
UnicodeSet& UnicodeSetTest::bitsToSet(int32_t a, UnicodeSet& result) {
result.clear();
for (UChar32 i = 0; i < 32; ++i) {
if ((a & (1<<i)) != 0) {
result.add(i);
}
}
return result;
}
/**
@ -737,6 +826,85 @@ UnicodeString UnicodeSetTest::getPairs(const UnicodeSet& set) {
return pairs;
}
/**
* Basic consistency check for a few items.
* That the iterator works, and that we can create a pattern and
* get the same thing back
*/
void UnicodeSetTest::checkRoundTrip(const UnicodeSet& s) {
UErrorCode ec = U_ZERO_ERROR;
UnicodeSet t(s);
checkEqual(s, t, "copy ct");
t = s;
checkEqual(s, t, "operator=");
copyWithIterator(t, s, FALSE);
checkEqual(s, t, "iterator roundtrip");
copyWithIterator(t, s, TRUE); // try range
checkEqual(s, t, "iterator roundtrip");
UnicodeString pat; s.toPattern(pat, FALSE);
t.applyPattern(pat, ec);
if (U_FAILURE(ec)) {
errln("FAIL: applyPattern");
return;
} else {
checkEqual(s, t, "toPattern(false)");
}
s.toPattern(pat, TRUE);
t.applyPattern(pat, ec);
if (U_FAILURE(ec)) {
errln("FAIL: applyPattern");
return;
} else {
checkEqual(s, t, "toPattern(true)");
}
}
void UnicodeSetTest::copyWithIterator(UnicodeSet& t, const UnicodeSet& s, UBool withRange) {
t.clear();
UnicodeSetIterator it(s);
if (withRange) {
while (it.nextRange()) {
if (it.codepoint == UnicodeSetIterator::IS_STRING) {
t.add(*it.string);
} else {
t.add(it.codepoint, it.codepointEnd);
}
}
} else {
while (it.next()) {
if (it.codepoint == UnicodeSetIterator::IS_STRING) {
t.add(*it.string);
} else {
t.add(it.codepoint);
}
}
}
}
UBool UnicodeSetTest::checkEqual(const UnicodeSet& s, const UnicodeSet& t, const char* message) {
UnicodeString source; s.toPattern(source, TRUE);
UnicodeString result; t.toPattern(result, TRUE);
if (s != t) {
errln((UnicodeString)"FAIL: " + message
+ "; source = " + source
+ "; result = " + result
);
return FALSE;
} else {
logln((UnicodeString)"Ok: " + message
+ "; source = " + source
+ "; result = " + result
);
}
return TRUE;
}
void
UnicodeSetTest::expectContainment(const UnicodeSet& set,
const UnicodeString& charsIn,
@ -835,6 +1003,36 @@ UnicodeSetTest::expectPairs(const UnicodeSet& set, const UnicodeString& expected
}
}
void UnicodeSetTest::expectToPattern(const UnicodeSet& set,
const UnicodeString& expPat,
char** expStrings) {
UnicodeString pat; set.toPattern(pat, true);
if (pat == expPat) {
logln((UnicodeString)"Ok: toPattern() => \"" + pat + "\"");
} else {
errln((UnicodeString)"FAIL: toPattern() => \"" + pat + "\", expected \"" + expPat + "\"");
return;
}
UBool in = TRUE;
for (int32_t i=0; expStrings[i] != NULL; ++i) {
if (expStrings[i] == NOT) { // sic; pointer comparison
in = FALSE;
continue;
}
UnicodeString s = CharsToUnicodeString(expStrings[i]);
UBool contained = set.contains(s);
if (contained == in) {
logln((UnicodeString)"Ok: " + expPat +
(contained ? " contains {" : " does not contain {") +
escape(expStrings[i]) + "}");
} else {
errln((UnicodeString)"FAIL: " + expPat +
(contained ? " contains {" : " does not contain {") +
escape(expStrings[i]) + "}");
}
}
}
static UChar toHexString(int32_t i) { return (UChar)(i + (i < 10 ? 0x30 : (0x41 - 10))); }
void

View file

@ -45,6 +45,10 @@ private:
void TestAPI(void);
void TestStrings(void);
void TestStringPatterns(void);
void TestScriptSet(void);
/**
@ -86,7 +90,7 @@ private:
/**
* Convert a bitmask to a UnicodeSet.
*/
static void bitsToSet(int32_t a, UnicodeSet&);
static UnicodeSet& bitsToSet(int32_t a, UnicodeSet&);
/**
* Convert a UnicodeSet to a bitmask. Only the characters
@ -101,6 +105,17 @@ private:
*/
static UnicodeString getPairs(const UnicodeSet& set);
/**
* Basic consistency check for a few items.
* That the iterator works, and that we can create a pattern and
* get the same thing back
*/
void checkRoundTrip(const UnicodeSet& s);
void copyWithIterator(UnicodeSet& t, const UnicodeSet& s, UBool withRange);
UBool checkEqual(const UnicodeSet& s, const UnicodeSet& t, const char* message);
void expectContainment(const UnicodeSet& set,
const UnicodeString& charsIn,
const UnicodeString& charsOut);
@ -113,6 +128,9 @@ private:
const UnicodeString& expectedPairs);
void expectPairs(const UnicodeSet& set,
const UnicodeString& expectedPairs);
void expectToPattern(const UnicodeSet& set,
const UnicodeString& expPat,
char** expStrings);
void doAssert(UBool, const char*);
public:
static UnicodeString escape(const UnicodeString& s);