ICU-2093 intltest rbbitest, remove dependency on regexp

X-SVN-Rev: 11990
This commit is contained in:
Andy Heninger 2003-05-17 02:07:52 +00:00
parent 8f263fc235
commit 49cba9e944
2 changed files with 56 additions and 15 deletions

View file

@ -20,13 +20,12 @@
#include "unicode/utf16.h"
#include "unicode/ucnv.h"
#include "unicode/schriter.h"
#include "unicode/regex.h"
#include "intltest.h"
#include "rbbitst.h"
#include <string.h>
#include "uvector.h"
#include "uvectr32.h"
#include "charstr.h"
#include <string.h>
#include <stdio.h>
@ -2235,13 +2234,26 @@ void RBBITest::TestExtended() {
// Open and read the test data file.
//
const char *testDataDirectory = loadTestData(status);
UnicodeString tdd(testDataDirectory);
// TODO: Remove regexp dependency
tdd = RegexMatcher("([/\\\\])out[/\\\\]testdata", tdd, 0, status).
replaceFirst("$1rbbitst.txt", status);
char testFileName[1000];
if (strlen(testDataDirectory) >= sizeof(testFileName)) {
errln("Can't open test data. Path too long.");
return;
}
strcpy(testFileName, testDataDirectory);
char *p = strstr(testFileName, "/out/testdata");
if (p == NULL) {
p = strstr(testFileName, "\\out\\testdata");
if (p == NULL) {
errln("Can't open test data. Bad test data directory path..");
return;
}
}
strcpy(p+1, "rbbitst.txt");
int len;
UChar *testFile = ReadAndConvertFile((const char *)CharString(tdd), len, status);
UChar *testFile = ReadAndConvertFile(testFileName, len, status);
//
// Put the test data into a UnicodeString

View file

@ -18,15 +18,47 @@
# \ at end of line -> Line Continuation. Remove both the backslash and the new line
#
#
########################################################################################
#
#
# G r a p h e m e C l u s t e r T e s t s
#
#
##########################################################################################
<char>
<data><>a•b<>c<>d<>,<>\u0666<></data>
<data>•a•b•c•d•,•\u0666•</data>
<char>
<data>•W•r•i•t•e• •h•i•n•d•i• •h•e•r•e•.• •भ••ा•\u0930•\u0924• •\u0938\u0941\u0902•\u0926•\u0930•</data> <data>•\u0939•\u094c•\u0964•</data>
<data>•a\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304•</data>
#
# Bug 1587. Tamil. \u0baa\u0bc1 should be two separate characters, even though
# Hyangmi would perfer that it be one.
#
<data>•\u0baa•\u0bc1•\u0baa•\u0bc1•</data>
########################################################################################
#
#
# W o r d B o u n d a r y T e s t s
#
#
##########################################################################################
<word>
<data>•hello<200> •there<200> •goodbye<200></data>
<data>•word<200> •two<200> •</data>
<data>•hello<200> •12345<100> •,•</data>
<char>
<data><>a•b<>c<>d<>,<>\u0666<></data>
<data>•a•b•c•d•,•\u0666•</data>
#
# Test data originally in RBBIAPITest::TestFirstNextFollowing() and TestLastPreviousPreceding()
@ -35,9 +67,6 @@
<word>
<data>•This<200> •is<200> •a<200> •word<200> •break<200>.• •Isn't<200> •it<200>?• •2.25<100></data>
<char>
<data>•W•r•i•t•e• •h•i•n•d•i• •h•e•r•e•.• •भ••ा•\u0930•\u0924• •\u0938\u0941\u0902•\u0926•\u0930•</data> <data>•\u0939•\u094c•\u0964•</data>
<data>•a\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304•</data>
<sent>
<data>•This\n•</data>