mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-15 09:45:26 +00:00
ICU-2093 intltest rbbitest, remove dependency on regexp
X-SVN-Rev: 11990
This commit is contained in:
parent
8f263fc235
commit
49cba9e944
2 changed files with 56 additions and 15 deletions
|
@ -20,13 +20,12 @@
|
|||
#include "unicode/utf16.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/schriter.h"
|
||||
#include "unicode/regex.h"
|
||||
#include "intltest.h"
|
||||
#include "rbbitst.h"
|
||||
#include <string.h>
|
||||
#include "uvector.h"
|
||||
#include "uvectr32.h"
|
||||
#include "charstr.h"
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
|
@ -2235,13 +2234,26 @@ void RBBITest::TestExtended() {
|
|||
// Open and read the test data file.
|
||||
//
|
||||
const char *testDataDirectory = loadTestData(status);
|
||||
UnicodeString tdd(testDataDirectory);
|
||||
// TODO: Remove regexp dependency
|
||||
tdd = RegexMatcher("([/\\\\])out[/\\\\]testdata", tdd, 0, status).
|
||||
replaceFirst("$1rbbitst.txt", status);
|
||||
|
||||
char testFileName[1000];
|
||||
if (strlen(testDataDirectory) >= sizeof(testFileName)) {
|
||||
errln("Can't open test data. Path too long.");
|
||||
return;
|
||||
}
|
||||
strcpy(testFileName, testDataDirectory);
|
||||
char *p = strstr(testFileName, "/out/testdata");
|
||||
if (p == NULL) {
|
||||
p = strstr(testFileName, "\\out\\testdata");
|
||||
if (p == NULL) {
|
||||
errln("Can't open test data. Bad test data directory path..");
|
||||
return;
|
||||
}
|
||||
}
|
||||
strcpy(p+1, "rbbitst.txt");
|
||||
|
||||
int len;
|
||||
UChar *testFile = ReadAndConvertFile((const char *)CharString(tdd), len, status);
|
||||
UChar *testFile = ReadAndConvertFile(testFileName, len, status);
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Put the test data into a UnicodeString
|
||||
|
|
43
icu4c/source/test/testdata/rbbitst.txt
vendored
43
icu4c/source/test/testdata/rbbitst.txt
vendored
|
@ -18,15 +18,47 @@
|
|||
# \ at end of line -> Line Continuation. Remove both the backslash and the new line
|
||||
#
|
||||
#
|
||||
|
||||
|
||||
|
||||
########################################################################################
|
||||
#
|
||||
#
|
||||
# G r a p h e m e C l u s t e r T e s t s
|
||||
#
|
||||
#
|
||||
##########################################################################################
|
||||
<char>
|
||||
<data><>a•b<>c<>d<>,<>\u0666<></data>
|
||||
<data>•a•b•c•d•,•\u0666•</data>
|
||||
|
||||
|
||||
<char>
|
||||
<data>•W•r•i•t•e• •h•i•n•d•i• •h•e•r•e•.• •भ••ा•\u0930•\u0924• •\u0938\u0941\u0902•\u0926•\u0930•</data> <data>•\u0939•\u094c•\u0964•</data>
|
||||
<data>•a\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304•</data>
|
||||
|
||||
|
||||
#
|
||||
# Bug 1587. Tamil. \u0baa\u0bc1 should be two separate characters, even though
|
||||
# Hyangmi would perfer that it be one.
|
||||
#
|
||||
<data>•\u0baa•\u0bc1•\u0baa•\u0bc1•</data>
|
||||
|
||||
########################################################################################
|
||||
#
|
||||
#
|
||||
# W o r d B o u n d a r y T e s t s
|
||||
#
|
||||
#
|
||||
##########################################################################################
|
||||
|
||||
|
||||
|
||||
<word>
|
||||
<data>•hello<200> •there<200> •goodbye<200></data>
|
||||
<data>•word<200> •two<200> •</data>
|
||||
<data>•hello<200> •12345<100> •,•</data>
|
||||
|
||||
<char>
|
||||
<data><>a•b<>c<>d<>,<>\u0666<></data>
|
||||
<data>•a•b•c•d•,•\u0666•</data>
|
||||
|
||||
|
||||
#
|
||||
# Test data originally in RBBIAPITest::TestFirstNextFollowing() and TestLastPreviousPreceding()
|
||||
|
@ -35,9 +67,6 @@
|
|||
<word>
|
||||
<data>•This<200> •is<200> •a<200> •word<200> •break<200>.• •Isn't<200> •it<200>?• •2.25<100></data>
|
||||
|
||||
<char>
|
||||
<data>•W•r•i•t•e• •h•i•n•d•i• •h•e•r•e•.• •भ••ा•\u0930•\u0924• •\u0938\u0941\u0902•\u0926•\u0930•</data> <data>•\u0939•\u094c•\u0964•</data>
|
||||
<data>•a\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304\u0301\u0302\u0303\u0304•</data>
|
||||
|
||||
<sent>
|
||||
<data>•This\n•</data>
|
||||
|
|
Loading…
Add table
Reference in a new issue