mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-96 merging Ram's test with cintltst. Exchanging private decompose for u_normalize, moving some constants
X-SVN-Rev: 4192
This commit is contained in:
parent
26d19f7d22
commit
a4dce67c57
4 changed files with 259 additions and 148 deletions
|
@ -838,9 +838,10 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st
|
|||
|
||||
/* produce canonical closure */
|
||||
for(u = 0; u < 0x10000; u++) {
|
||||
/*if((noOfDec = unorm_normalize((const UChar *)&u, 1, UNORM_NFD, 0, decomp, 256, status)) > 1
|
||||
|| (noOfDec == 1 && *decomp != (UChar)u))*/
|
||||
if((noOfDec = uprv_ucol_decompose ((UChar)u, decomp)) > 1 || (noOfDec == 1 && *decomp != (UChar)u)) {
|
||||
if((noOfDec = unorm_normalize((const UChar *)&u, 1, UNORM_NFD, 0, decomp, 256, status)) > 1
|
||||
|| (noOfDec == 1 && *decomp != (UChar)u))
|
||||
/*if((noOfDec = uprv_ucol_decompose ((UChar)u, decomp)) > 1 || (noOfDec == 1 && *decomp != (UChar)u))*/
|
||||
{
|
||||
compRes = ucol_getDynamicCEs(src, t, (UChar *)&u, 1, compCE, 256, status);
|
||||
el.noOfCEs = ucol_getDynamicCEs(src, t, decomp, noOfDec, el.CEs, 128, status);
|
||||
|
||||
|
|
|
@ -309,9 +309,6 @@ UBool ucol_uprv_tok_readAndSetOption(UCATableHeader *image, const UChar* start,
|
|||
}
|
||||
}
|
||||
|
||||
#define UCOL_TOK_UNSET 0xFFFFFFFF
|
||||
#define UCOL_TOK_RESET 0xDEADBEEF
|
||||
|
||||
const UChar *ucol_tok_parseNextToken(UColTokenParser *src,
|
||||
uint32_t *strength,
|
||||
uint32_t *chOffset, uint32_t *chLen,
|
||||
|
|
|
@ -23,6 +23,9 @@
|
|||
|
||||
#include "ucol_imp.h"
|
||||
|
||||
#define UCOL_TOK_UNSET 0xFFFFFFFF
|
||||
#define UCOL_TOK_RESET 0xDEADBEEF
|
||||
|
||||
#define UCOL_TOK_POLARITY_NEGATIVE 0
|
||||
#define UCOL_TOK_POLARITY_POSITIVE 1
|
||||
|
||||
|
|
|
@ -26,6 +26,8 @@
|
|||
#include "unicode/ustring.h"
|
||||
#include "string.h"
|
||||
#include "ucol_imp.h"
|
||||
#include "ucol_tok.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
static UCollator *myCollation;
|
||||
const static UChar rules[MAX_TOKEN_LEN] =
|
||||
|
@ -535,6 +537,254 @@ static void PrintMarkDavis( )
|
|||
}
|
||||
}
|
||||
|
||||
static void CollationLocaleTest( ) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UResourceBundle *rb = ures_open(NULL, "fr_FR_you_ll_never_find_this_locale", &status);
|
||||
const char *rbLocale = ures_getLocale(rb, &status);
|
||||
|
||||
|
||||
UResourceBundle *coll = ures_getByKey(rb, "CollationElements", NULL, &status);
|
||||
const char *locale = ures_getLocale(coll, &status);
|
||||
|
||||
}
|
||||
|
||||
void testPrimary(UCollator* col, const UChar* p,const UChar* q){
|
||||
UChar source[256] = { '\0'};
|
||||
UChar target[256] = { '\0'};
|
||||
UChar temp[2] = {'\0'};
|
||||
unsigned char utfSource[256] = {'\0'};
|
||||
unsigned char utfTarget[256] = {'\0'};
|
||||
doTest(col, p, q, UCOL_LESS);
|
||||
/*
|
||||
UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
|
||||
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(p,utfSource,256);
|
||||
aescstrdup(q,utfTarget,256);
|
||||
fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
|
||||
}
|
||||
*/
|
||||
source[0] = 0x00E0;
|
||||
u_strcat(source,p);
|
||||
target[0] = 0x0061;
|
||||
u_strcat(target,q);
|
||||
doTest(col, source, target, UCOL_LESS);
|
||||
/*
|
||||
result = ucol_strcoll(col,source,u_strlen(source),target,u_strlen(target));
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(source,utfSource,256);
|
||||
aescstrdup(target,utfTarget,256);
|
||||
fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
void testSecondary(UCollator* col, const UChar* p,const UChar* q){
|
||||
UChar source[256] = { '\0'};
|
||||
UChar target[256] = { '\0'};
|
||||
UChar temp[2] = {'\0'};
|
||||
unsigned char utfSource[256] = {'\0'};
|
||||
unsigned char utfTarget[256] = {'\0'};
|
||||
|
||||
doTest(col, p, q, UCOL_LESS);
|
||||
/*
|
||||
UCollationResult result= ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
|
||||
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(p,utfSource,256);
|
||||
aescstrdup(q,utfTarget,256);
|
||||
fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
|
||||
}
|
||||
*/
|
||||
source[0] = 0x0041;
|
||||
u_strcat(source,p);
|
||||
target[0]= 0x0061;
|
||||
u_strcat(target,q);
|
||||
|
||||
doTest(col, source, target, UCOL_LESS);
|
||||
/*
|
||||
result = ucol_strcoll(col,source,u_strlen(source),target,u_strlen(target));
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(source,utfSource,256);
|
||||
aescstrdup(target,utfTarget,256);
|
||||
fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
|
||||
}
|
||||
*/
|
||||
source[0] = '\0';
|
||||
u_strcat(source,p);
|
||||
u_strcat(source,(UChar*)"b");
|
||||
target[0] = '\0';
|
||||
u_strcat(target,q);
|
||||
u_strcat(target,(UChar*)"a");
|
||||
doTest(col, source, target, UCOL_GREATER);
|
||||
/*
|
||||
result = ucol_strcoll(col,source,u_strlen(source),target,u_strlen(target));
|
||||
if(result!=UCOL_GREATER){
|
||||
aescstrdup(source,utfSource,256);
|
||||
aescstrdup(target,utfTarget,256);
|
||||
fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
void testTertiary(UCollator* col, const UChar* p,const UChar* q){
|
||||
UChar source[256] = { '\0'};
|
||||
UChar target[256] = { '\0'};
|
||||
UChar temp[2] = {'\0'};
|
||||
unsigned char utfSource[256] = {'\0'};
|
||||
unsigned char utfTarget[256] = {'\0'};
|
||||
doTest(col, p, q, UCOL_LESS);
|
||||
/*
|
||||
UCollationResult result= ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(p,utfSource,256);
|
||||
aescstrdup(q,utfTarget,256);
|
||||
fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
|
||||
}
|
||||
*/
|
||||
source[0] = 0x0020;
|
||||
u_strcat(source,p);
|
||||
target[0]= 0x002D;
|
||||
u_strcat(target,q);
|
||||
|
||||
doTest(col, source, target, UCOL_LESS);
|
||||
/*
|
||||
result = ucol_strcoll(col,source,u_strlen(source),target,u_strlen(target));
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(source,utfSource,256);
|
||||
aescstrdup(target,utfTarget,256);
|
||||
fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
|
||||
}
|
||||
*/
|
||||
source[0] = '\0';
|
||||
u_strcat(source,p);
|
||||
*temp = 0x00E0;
|
||||
u_strcat(source,temp);
|
||||
target[0] = '\0';
|
||||
u_strcat(target,q);
|
||||
u_strcat(target,(UChar*)"a");
|
||||
doTest(col, source, target, UCOL_GREATER);
|
||||
/*
|
||||
result = ucol_strcoll(col,source,u_strlen(source),target,u_strlen(target));
|
||||
if(result!=UCOL_GREATER){
|
||||
aescstrdup(source,utfSource,256);
|
||||
aescstrdup(target,utfTarget,256);
|
||||
fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
|
||||
}
|
||||
*/
|
||||
}
|
||||
void testEquality(UCollator* col, const UChar* p,const UChar* q){
|
||||
UChar source[256] = { '\0'};
|
||||
UChar target[256] = { '\0'};
|
||||
UChar temp[2] = {'\0'};
|
||||
unsigned char utfSource[256] = {'\0'};
|
||||
unsigned char utfTarget[256] = {'\0'};
|
||||
doTest(col, p, q, UCOL_EQUAL);
|
||||
/*
|
||||
UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
|
||||
|
||||
if(result!=UCOL_EQUAL){
|
||||
aescstrdup(p,utfSource,256);
|
||||
aescstrdup(q,utfTarget,256);
|
||||
fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
void testCollator(UCollator* col, const UChar* p,const UChar* q, uint32_t strength){
|
||||
UChar source[256] = { '\0'};
|
||||
UChar target[256] = { '\0'};
|
||||
UChar temp[2] = {'\0'};
|
||||
unsigned char utfSource[256] = {'\0'};
|
||||
unsigned char utfTarget[256] = {'\0'};
|
||||
UCollationResult result=0;
|
||||
switch(strength){
|
||||
case UCOL_IDENTICAL:
|
||||
testEquality(col,p,q);
|
||||
break;
|
||||
|
||||
case UCOL_PRIMARY:
|
||||
testPrimary(col,p,q);
|
||||
break;
|
||||
case UCOL_SECONDARY:
|
||||
testSecondary(col,p,q);
|
||||
break;
|
||||
case UCOL_TERTIARY:
|
||||
testTertiary(col,p,q);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static char* localesToTest[] = {
|
||||
"ar", "bg", "ca", "cs", "da",
|
||||
"el", "en_BE", /*"en_US_POSIX", */
|
||||
"es", "et", "fi", "fr", "hi",
|
||||
"hr", "hu", "is", "iw", /*"ja", */
|
||||
/*"ko",*/ "lt", "lv", "mk", "mt",
|
||||
"nb", "nn", "nn_NO", "pl", "ro",
|
||||
"ru", "sh", "sk", "sl", "sq",
|
||||
"sr", "sv", "th", "tr", "uk",
|
||||
"vi", "zh", "zh_TW"
|
||||
};
|
||||
|
||||
static void RamsRulesTest( ) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
uint32_t i = 0;
|
||||
UCollator *coll = NULL;
|
||||
const UChar *rules = NULL, *current = NULL;
|
||||
uint32_t ruleLen = 0;
|
||||
uint32_t strength = 0;
|
||||
uint32_t chOffset = 0; uint32_t chLen = 0;
|
||||
uint32_t exOffset = 0; uint32_t exLen = 0;
|
||||
UBool varT = FALSE; UBool top_ = TRUE;
|
||||
UBool startOfRules = TRUE;
|
||||
UColTokenParser src;
|
||||
UCATableHeader img;
|
||||
|
||||
UChar first[256];
|
||||
UChar second[256];
|
||||
UChar *rulesCopy = NULL;
|
||||
|
||||
src.image = &img;
|
||||
|
||||
|
||||
|
||||
for(i = 0; i<sizeof(localesToTest)/sizeof(localesToTest[0]); i++) {
|
||||
coll = ucol_open(localesToTest[i], &status);
|
||||
fprintf(stderr, "%s\n", localesToTest[i]);
|
||||
rules = ucol_getRules(coll, &ruleLen);
|
||||
if(U_SUCCESS(status) && ruleLen > 0) {
|
||||
rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
|
||||
uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
|
||||
src.source = src.current = rulesCopy;
|
||||
src.end = rulesCopy+ruleLen;
|
||||
src.extraCurrent = src.end;
|
||||
src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
|
||||
*first = *second = 0;
|
||||
|
||||
while ((current = ucol_tok_parseNextToken(&src, &strength,
|
||||
&chOffset, &chLen, &exOffset, &exLen,
|
||||
&varT, &top_, startOfRules, &status)) != NULL) {
|
||||
startOfRules = FALSE;
|
||||
|
||||
u_strncpy(second,rulesCopy+chOffset, chLen);
|
||||
if(exLen > 0) {
|
||||
u_strncat(second+chLen, rulesCopy+exOffset, exLen);
|
||||
}
|
||||
second[chLen+exLen] = 0;
|
||||
if(strength != UCOL_TOK_RESET) {
|
||||
testCollator(coll,first,second,strength);
|
||||
}
|
||||
u_strcpy(first, second);
|
||||
|
||||
}
|
||||
uprv_free(rulesCopy);
|
||||
}
|
||||
ucol_close(coll);
|
||||
}
|
||||
}
|
||||
|
||||
void addMiscCollTest(TestNode** root)
|
||||
{
|
||||
|
@ -542,6 +792,8 @@ void addMiscCollTest(TestNode** root)
|
|||
addTest(root, &IncompleteCntTest, "tscoll/cmsccoll/IncompleteCntTest");
|
||||
addTest(root, &BlackBirdTest, "tscoll/cmsccoll/BlackBirdTest");
|
||||
addTest(root, &FunkyATest, "tscoll/cmsccoll/FunkyATest");
|
||||
addTest(root, &CollationLocaleTest, "tscoll/cmsccoll/CollationLocaleTest");
|
||||
/*addTest(root, &RamsRulesTest, "tscoll/cmsccoll/RamsRulesTest");*/
|
||||
/*addTest(root, &PrintMarkDavis, "tscoll/cmsccoll/PrintMarkDavis");*/
|
||||
}
|
||||
|
||||
|
@ -633,148 +885,6 @@ char *aescstrdup(const UChar* unichars, char* buf,int len){
|
|||
*target = '\0';
|
||||
return newString;
|
||||
}
|
||||
void testPrimary(UCollator* col, const UChar* p,const UChar* q){
|
||||
UChar source[256] = { '\0'};
|
||||
UChar target[256] = { '\0'};
|
||||
UChar temp[2] = {'\0'};
|
||||
unsigned char utfSource[256] = {'\0'};
|
||||
unsigned char utfTarget[256] = {'\0'};
|
||||
UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
|
||||
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(p,utfSource,256);
|
||||
aescstrdup(q,utfTarget,256);
|
||||
fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
|
||||
}
|
||||
source[0] = 0x00E0;
|
||||
u_strcat(source,p);
|
||||
target[0] = 0x0061;
|
||||
u_strcat(target,q);
|
||||
result = ucol_strcoll(col,source,u_strlen(source),target,u_strlen(target));
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(source,utfSource,256);
|
||||
aescstrdup(target,utfTarget,256);
|
||||
fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
|
||||
}
|
||||
}
|
||||
|
||||
void testSecondary(UCollator* col, const UChar* p,const UChar* q){
|
||||
UChar source[256] = { '\0'};
|
||||
UChar target[256] = { '\0'};
|
||||
UChar temp[2] = {'\0'};
|
||||
unsigned char utfSource[256] = {'\0'};
|
||||
unsigned char utfTarget[256] = {'\0'};
|
||||
|
||||
UCollationResult result= ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
|
||||
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(p,utfSource,256);
|
||||
aescstrdup(q,utfTarget,256);
|
||||
fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
|
||||
}
|
||||
source[0] = 0x0041;
|
||||
u_strcat(source,p);
|
||||
target[0]= 0x0061;
|
||||
u_strcat(target,q);
|
||||
result = ucol_strcoll(col,source,u_strlen(source),target,u_strlen(target));
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(source,utfSource,256);
|
||||
aescstrdup(target,utfTarget,256);
|
||||
fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
|
||||
}
|
||||
source[0] = '\0';
|
||||
u_strcat(source,p);
|
||||
u_strcat(source,(UChar*)"b");
|
||||
target[0] = '\0';
|
||||
u_strcat(target,q);
|
||||
u_strcat(target,(UChar*)"a");
|
||||
result = ucol_strcoll(col,source,u_strlen(source),target,u_strlen(target));
|
||||
if(result!=UCOL_GREATER){
|
||||
aescstrdup(source,utfSource,256);
|
||||
aescstrdup(target,utfTarget,256);
|
||||
fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
|
||||
}
|
||||
}
|
||||
|
||||
void testTertiary(UCollator* col, const UChar* p,const UChar* q){
|
||||
UChar source[256] = { '\0'};
|
||||
UChar target[256] = { '\0'};
|
||||
UChar temp[2] = {'\0'};
|
||||
unsigned char utfSource[256] = {'\0'};
|
||||
unsigned char utfTarget[256] = {'\0'};
|
||||
UCollationResult result= ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(p,utfSource,256);
|
||||
aescstrdup(q,utfTarget,256);
|
||||
fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
|
||||
}
|
||||
|
||||
source[0] = 0x0020;
|
||||
u_strcat(source,p);
|
||||
target[0]= 0x002D;
|
||||
u_strcat(target,q);
|
||||
result = ucol_strcoll(col,source,u_strlen(source),target,u_strlen(target));
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(source,utfSource,256);
|
||||
aescstrdup(target,utfTarget,256);
|
||||
fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
|
||||
}
|
||||
|
||||
source[0] = '\0';
|
||||
u_strcat(source,p);
|
||||
*temp = 0x00E0;
|
||||
u_strcat(source,temp);
|
||||
target[0] = '\0';
|
||||
u_strcat(target,q);
|
||||
u_strcat(target,(UChar*)"a");
|
||||
result = ucol_strcoll(col,source,u_strlen(source),target,u_strlen(target));
|
||||
if(result!=UCOL_GREATER){
|
||||
aescstrdup(source,utfSource,256);
|
||||
aescstrdup(target,utfTarget,256);
|
||||
fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
|
||||
}
|
||||
}
|
||||
void testEquality(UCollator* col, const UChar* p,const UChar* q){
|
||||
UChar source[256] = { '\0'};
|
||||
UChar target[256] = { '\0'};
|
||||
UChar temp[2] = {'\0'};
|
||||
unsigned char utfSource[256] = {'\0'};
|
||||
unsigned char utfTarget[256] = {'\0'};
|
||||
UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
|
||||
|
||||
if(result!=UCOL_EQUAL){
|
||||
aescstrdup(p,utfSource,256);
|
||||
aescstrdup(q,utfTarget,256);
|
||||
fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
|
||||
}
|
||||
}
|
||||
|
||||
void testCollator(UCollator* col, const UChar* p,const UChar* q, UChar* delimiter,int strength){
|
||||
UChar source[256] = { '\0'};
|
||||
UChar target[256] = { '\0'};
|
||||
UChar temp[2] = {'\0'};
|
||||
unsigned char utfSource[256] = {'\0'};
|
||||
unsigned char utfTarget[256] = {'\0'};
|
||||
UCollationResult result=0;
|
||||
switch(strength){
|
||||
case 0:
|
||||
testEquality(col,p,q);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
testPrimary(col,p,q);
|
||||
break;
|
||||
case 2:
|
||||
testSecondary(col,p,q);
|
||||
break;
|
||||
case 3:
|
||||
testTertiary(col,p,q);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
/*ar bg ca cs da el en_BE en_US_POSIX es et fi fr hi hr hu is iw ja ko lt lv mk mt nb nn nn_NO pl ro ru sh sk sl sq sr sv th tr uk vi zh zh_TW*/
|
||||
UChar* consumeDelimiter(UChar** source, int srcLen,int* strength, UChar** delimiter){
|
||||
UChar* local = *source;
|
||||
UBool foundDelimiter = FALSE;
|
||||
|
|
Loading…
Add table
Reference in a new issue