mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 13:35:32 +00:00
ICU-11886 Speed up ucol_open by 21%
* implement a deserialize constructor for UnicodeSet * add test cases for same * add a generator (`gencolusb`) which can build `source/i18n/collunsafe.h` * Note that for bootstrapping `collunsafe.h` can be truncated (zero length). X-SVN-Rev: 37929
This commit is contained in:
parent
b5fe5f98c4
commit
94fe10c710
12 changed files with 636 additions and 7 deletions
1
.gitattributes
vendored
1
.gitattributes
vendored
|
@ -150,6 +150,7 @@ icu4c/source/tools/genccode/genccode.vcxproj -text
|
|||
icu4c/source/tools/gencfu/gencfu.vcxproj -text
|
||||
icu4c/source/tools/gencmn/gencmn.vcxproj -text
|
||||
icu4c/source/tools/gencnval/gencnval.vcxproj -text
|
||||
icu4c/source/tools/gencolusb/README.md -text
|
||||
icu4c/source/tools/gendict/gendict.vcxproj -text
|
||||
icu4c/source/tools/gennorm2/gennorm2.vcxproj -text
|
||||
icu4c/source/tools/genrb/derb.vcxproj -text
|
||||
|
|
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -715,6 +715,9 @@ icu4c/source/tools/gencnval/gencnval.vcproj.*.*.user
|
|||
icu4c/source/tools/gencnval/release
|
||||
icu4c/source/tools/gencnval/x64
|
||||
icu4c/source/tools/gencnval/x86
|
||||
icu4c/source/tools/gencolusb/Makefile.local
|
||||
icu4c/source/tools/gencolusb/extract_unsafe_backwards
|
||||
icu4c/source/tools/gencolusb/verify_uset
|
||||
icu4c/source/tools/gendict/*.1
|
||||
icu4c/source/tools/gendict/*.d
|
||||
icu4c/source/tools/gendict/*.o
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
***************************************************************************
|
||||
* Copyright (C) 1999-2014, International Business Machines Corporation
|
||||
* Copyright (C) 1999-2015, International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
***************************************************************************
|
||||
* Date Name Description
|
||||
|
@ -369,6 +369,29 @@ public:
|
|||
*/
|
||||
UnicodeSet(UChar32 start, UChar32 end);
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
enum ESerialization {
|
||||
kSerialized /* result of serialize() */
|
||||
};
|
||||
|
||||
/**
|
||||
* Constructs a set from the output of serialize().
|
||||
* The resulting set will be frozen.
|
||||
*
|
||||
* @param buffer the 16 bit array
|
||||
* @param bufferLen the original length returned from serialize()
|
||||
* @param serialization the value 'kSerialized'
|
||||
* @param status error code
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
UnicodeSet(const uint16_t buffer[], int32_t bufferLen,
|
||||
ESerialization serialization, UErrorCode &status);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Constructs a set from the given pattern. See the class
|
||||
* description for the syntax of the pattern language.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2012, International Business Machines
|
||||
* Copyright (C) 1999-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
|
@ -1468,6 +1468,82 @@ UnicodeSet& UnicodeSet::compact() {
|
|||
return *this;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_SERIALIZE
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Deserialize constructor.
|
||||
* Result will be frozen.
|
||||
*/
|
||||
UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization serialization, UErrorCode &ec)
|
||||
: len(1), capacity(1+START_EXTRA), list(0), bmpSet(0), buffer(0),
|
||||
bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
|
||||
fFlags(0) {
|
||||
|
||||
if(U_FAILURE(ec)) {
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
|
||||
if( (serialization != kSerialized)
|
||||
|| (data==NULL)
|
||||
|| (dataLen < 1)) {
|
||||
ec = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
|
||||
allocateStrings(ec);
|
||||
if (U_FAILURE(ec)) {
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
|
||||
// bmp?
|
||||
int32_t headerSize = ((data[0]&0x8000)) ?2:1;
|
||||
int32_t bmpLength = (headerSize==1)?data[0]:data[1];
|
||||
|
||||
len = (((data[0]&0x7FFF)-bmpLength)/2)+bmpLength;
|
||||
#ifdef DEBUG_SERIALIZE
|
||||
printf("dataLen %d headerSize %d bmpLen %d len %d. data[0]=%X/%X/%X/%X\n", dataLen,headerSize,bmpLength,len, data[0],data[1],data[2],data[3]);
|
||||
#endif
|
||||
capacity = len+1;
|
||||
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
|
||||
if(!list || U_FAILURE(ec)) {
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
// copy bmp
|
||||
int32_t i;
|
||||
for(i = 0; i< bmpLength;i++) {
|
||||
list[i] = data[i+headerSize];
|
||||
#ifdef DEBUG_SERIALIZE
|
||||
printf("<<16@%d[%d] %X\n", i+headerSize, i, list[i]);
|
||||
#endif
|
||||
}
|
||||
/*
|
||||
if(bmpLength>0) {
|
||||
bmpSet= new BMPSet(list, bmpLength);
|
||||
if(bmpSet == NULL) {
|
||||
ec = U_MEMORY_ALLOCATION_ERROR;
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
}*/
|
||||
// copy smp
|
||||
for(i=bmpLength;i<len;i++) {
|
||||
list[i] = ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+0] << 16) +
|
||||
((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+1]);
|
||||
#ifdef DEBUG_SERIALIZE
|
||||
printf("<<32@%d+[%d] %lX\n", headerSize+bmpLength+i, i, list[i]);
|
||||
#endif
|
||||
}
|
||||
// terminator
|
||||
list[len++]=UNICODESET_HIGH;
|
||||
}
|
||||
|
||||
|
||||
int32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const {
|
||||
int32_t bmpLength, length, destLength;
|
||||
|
||||
|
@ -1506,7 +1582,9 @@ int32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode&
|
|||
for (bmpLength=0; bmpLength<length && this->list[bmpLength]<=0xffff; ++bmpLength) {}
|
||||
length=bmpLength+2*(length-bmpLength);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_SERIALIZE
|
||||
printf(">> bmpLength%d length%d len%d\n", bmpLength, length, len);
|
||||
#endif
|
||||
/* length: number of 16-bit array units */
|
||||
if (length>0x7fff) {
|
||||
/* there are only 15 bits for the length in the first serialized word */
|
||||
|
@ -1525,6 +1603,9 @@ int32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode&
|
|||
const UChar32 *p;
|
||||
int32_t i;
|
||||
|
||||
#ifdef DEBUG_SERIALIZE
|
||||
printf("writeHdr\n");
|
||||
#endif
|
||||
*dest=(uint16_t)length;
|
||||
if (length>bmpLength) {
|
||||
*dest|=0x8000;
|
||||
|
@ -1535,11 +1616,17 @@ int32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode&
|
|||
/* write the BMP part of the array */
|
||||
p=this->list;
|
||||
for (i=0; i<bmpLength; ++i) {
|
||||
#ifdef DEBUG_SERIALIZE
|
||||
printf("writebmp: %x\n", (int)*p);
|
||||
#endif
|
||||
*dest++=(uint16_t)*p++;
|
||||
}
|
||||
|
||||
/* write the supplementary part of the array */
|
||||
for (; i<length; i+=2) {
|
||||
#ifdef DEBUG_SERIALIZE
|
||||
printf("write32: %x\n", (int)*p);
|
||||
#endif
|
||||
*dest++=(uint16_t)(*p>>16);
|
||||
*dest++=(uint16_t)*p++;
|
||||
}
|
||||
|
|
|
@ -30,6 +30,13 @@
|
|||
#include "ucmndata.h"
|
||||
#include "utrie2.h"
|
||||
|
||||
// #if U_HAVE_COLLUNSAFE
|
||||
#if 1
|
||||
#include "collunsafe.h"
|
||||
|
||||
#endif
|
||||
// #end
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
namespace {
|
||||
|
@ -262,6 +269,28 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes
|
|||
return;
|
||||
}
|
||||
if(baseData == NULL) {
|
||||
#if defined( COLLUNSAFE_COLL_VERSION )
|
||||
#if defined(COLLUNSAFE_RANGES) /* slower but still an improvement*/
|
||||
tailoring.unsafeBackwardSet = new UnicodeSet();
|
||||
|
||||
for(int32_t i=0;i<unsafe_rangeCount*2;i+=2) {
|
||||
tailoring.unsafeBackwardSet->add(unsafe_ranges[i+0],unsafe_ranges[i+1]);
|
||||
}
|
||||
tailoring.unsafeBackwardSet->freeze();
|
||||
#elif defined (COLLUNSAFE_SERIALIZE)
|
||||
/* faster */
|
||||
tailoring.unsafeBackwardSet = new UnicodeSet(unsafe_serializedData, unsafe_serializedCount, UnicodeSet::kSerialized, errorCode);
|
||||
if(tailoring.unsafeBackwardSet == NULL) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
} else if (U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
#else
|
||||
#error no unsafe-backwards strategy chosen
|
||||
#endif
|
||||
|
||||
#else
|
||||
// Create the unsafe-backward set for the root collator.
|
||||
// Include all non-zero combining marks and trail surrogates.
|
||||
// We do this at load time, rather than at build time,
|
||||
|
@ -279,6 +308,7 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes
|
|||
return;
|
||||
}
|
||||
data->nfcImpl.addLcccChars(*tailoring.unsafeBackwardSet);
|
||||
#endif
|
||||
} else {
|
||||
// Clone the root collator's set contents.
|
||||
tailoring.unsafeBackwardSet = static_cast<UnicodeSet *>(
|
||||
|
|
122
icu4c/source/i18n/collunsafe.h
Normal file
122
icu4c/source/i18n/collunsafe.h
Normal file
|
@ -0,0 +1,122 @@
|
|||
// collunsafe.h
|
||||
// Copyright (C) 2015, International Business Machines Corporation and others. All Rights Reserved.
|
||||
|
||||
// To be included by collationdatareader.cpp, and generated by gencolusb.
|
||||
// Machine generated, do not edit.
|
||||
|
||||
#ifndef COLLUNSAFE_H
|
||||
#define COLLUNSAFE_H
|
||||
|
||||
#define COLLUNSAFE_ICU_VERSION "56.0.1"
|
||||
#define COLLUNSAFE_COLL_VERSION "9.64"
|
||||
#define COLLUNSAFE_SERIALIZE 1
|
||||
static const int32_t unsafe_serializedCount = 850;
|
||||
static const uint16_t unsafe_serializedData[850] = {
|
||||
0x8350, 0x01B8, 0x0034, 0x0035, 0x004C, 0x004D, 0x00A0, 0x00A1, // 8
|
||||
0x0300, 0x034F, 0x0350, 0x0370, 0x03A9, 0x03AA, 0x03E2, 0x03E3, // 16
|
||||
0x042F, 0x0430, 0x0483, 0x0488, 0x0531, 0x0532, 0x0591, 0x05BE, // 24
|
||||
0x05BF, 0x05C0, 0x05C1, 0x05C3, 0x05C4, 0x05C6, 0x05C7, 0x05C8, // 32
|
||||
0x05D0, 0x05D1, 0x0610, 0x061B, 0x0628, 0x0629, 0x064B, 0x0660, // 40
|
||||
0x0670, 0x0671, 0x06D6, 0x06DD, 0x06DF, 0x06E5, 0x06E7, 0x06E9, // 48
|
||||
0x06EA, 0x06EE, 0x0710, 0x0712, 0x0730, 0x074B, 0x078C, 0x078D, // 56
|
||||
0x07D8, 0x07D9, 0x07EB, 0x07F4, 0x0800, 0x0801, 0x0816, 0x081A, // 64
|
||||
0x081B, 0x0824, 0x0825, 0x0828, 0x0829, 0x082E, 0x0840, 0x0841, // 72
|
||||
0x0859, 0x085C, 0x08E3, 0x0900, 0x0905, 0x0906, 0x093C, 0x093D, // 80
|
||||
0x094D, 0x094E, 0x0951, 0x0955, 0x0995, 0x0996, 0x09BC, 0x09BD, // 88
|
||||
0x09BE, 0x09BF, 0x09CD, 0x09CE, 0x09D7, 0x09D8, 0x0A15, 0x0A16, // 96
|
||||
0x0A3C, 0x0A3D, 0x0A4D, 0x0A4E, 0x0A95, 0x0A96, 0x0ABC, 0x0ABD, // 104
|
||||
0x0ACD, 0x0ACE, 0x0B15, 0x0B16, 0x0B3C, 0x0B3D, 0x0B3E, 0x0B3F, // 112
|
||||
0x0B4D, 0x0B4E, 0x0B56, 0x0B58, 0x0B95, 0x0B96, 0x0BBE, 0x0BBF, // 120
|
||||
0x0BCD, 0x0BCE, 0x0BD7, 0x0BD8, 0x0C15, 0x0C16, 0x0C4D, 0x0C4E, // 128
|
||||
0x0C55, 0x0C57, 0x0C95, 0x0C96, 0x0CBC, 0x0CBD, 0x0CC2, 0x0CC3, // 136
|
||||
0x0CCD, 0x0CCE, 0x0CD5, 0x0CD7, 0x0D15, 0x0D16, 0x0D3E, 0x0D3F, // 144
|
||||
0x0D4D, 0x0D4E, 0x0D57, 0x0D58, 0x0D85, 0x0D86, 0x0DCA, 0x0DCB, // 152
|
||||
0x0DCF, 0x0DD0, 0x0DDF, 0x0DE0, 0x0E01, 0x0E2F, 0x0E32, 0x0E33, // 160
|
||||
0x0E38, 0x0E3B, 0x0E48, 0x0E4C, 0x0E81, 0x0E83, 0x0E84, 0x0E85, // 168
|
||||
0x0E87, 0x0E89, 0x0E8A, 0x0E8B, 0x0E8D, 0x0E8E, 0x0E94, 0x0E98, // 176
|
||||
0x0E99, 0x0EA0, 0x0EA1, 0x0EA4, 0x0EA5, 0x0EA6, 0x0EA7, 0x0EA8, // 184
|
||||
0x0EAA, 0x0EAC, 0x0EAD, 0x0EAF, 0x0EB2, 0x0EB3, 0x0EB8, 0x0EBA, // 192
|
||||
0x0EC8, 0x0ECC, 0x0EDC, 0x0EE0, 0x0F18, 0x0F1A, 0x0F35, 0x0F36, // 200
|
||||
0x0F37, 0x0F38, 0x0F39, 0x0F3A, 0x0F40, 0x0F41, 0x0F71, 0x0F76, // 208
|
||||
0x0F7A, 0x0F7E, 0x0F80, 0x0F85, 0x0F86, 0x0F88, 0x0FC6, 0x0FC7, // 216
|
||||
0x1000, 0x1001, 0x102E, 0x102F, 0x1037, 0x1038, 0x1039, 0x103B, // 224
|
||||
0x108D, 0x108E, 0x10D3, 0x10D4, 0x12A0, 0x12A1, 0x135D, 0x1360, // 232
|
||||
0x13C4, 0x13C5, 0x14C0, 0x14C1, 0x168F, 0x1690, 0x16A0, 0x16A1, // 240
|
||||
0x1703, 0x1704, 0x1714, 0x1715, 0x1723, 0x1724, 0x1734, 0x1735, // 248
|
||||
0x1743, 0x1744, 0x1763, 0x1764, 0x1780, 0x1781, 0x17D2, 0x17D3, // 256
|
||||
0x17DD, 0x17DE, 0x1826, 0x1827, 0x18A9, 0x18AA, 0x1900, 0x1901, // 264
|
||||
0x1939, 0x193C, 0x1950, 0x1951, 0x1980, 0x19AC, 0x1A00, 0x1A01, // 272
|
||||
0x1A17, 0x1A19, 0x1A20, 0x1A21, 0x1A60, 0x1A61, 0x1A75, 0x1A7D, // 280
|
||||
0x1A7F, 0x1A80, 0x1AB0, 0x1ABE, 0x1B05, 0x1B06, 0x1B34, 0x1B36, // 288
|
||||
0x1B44, 0x1B45, 0x1B6B, 0x1B74, 0x1B83, 0x1B84, 0x1BAA, 0x1BAC, // 296
|
||||
0x1BC0, 0x1BC1, 0x1BE6, 0x1BE7, 0x1BF2, 0x1BF4, 0x1C00, 0x1C01, // 304
|
||||
0x1C37, 0x1C38, 0x1C5A, 0x1C5B, 0x1CD0, 0x1CD3, 0x1CD4, 0x1CE1, // 312
|
||||
0x1CE2, 0x1CE9, 0x1CED, 0x1CEE, 0x1CF4, 0x1CF5, 0x1CF8, 0x1CFA, // 320
|
||||
0x1DC0, 0x1DF6, 0x1DFC, 0x1E00, 0x201C, 0x201D, 0x20AC, 0x20AD, // 328
|
||||
0x20D0, 0x20DD, 0x20E1, 0x20E2, 0x20E5, 0x20F1, 0x263A, 0x263B, // 336
|
||||
0x2C00, 0x2C01, 0x2CEF, 0x2CF2, 0x2D5E, 0x2D5F, 0x2D7F, 0x2D80, // 344
|
||||
0x2DE0, 0x2E00, 0x302A, 0x3030, 0x304B, 0x304C, 0x3099, 0x309B, // 352
|
||||
0x30AB, 0x30AC, 0x3105, 0x3106, 0x5B57, 0x5B58, 0xA288, 0xA289, // 360
|
||||
0xA4E8, 0xA4E9, 0xA549, 0xA54A, 0xA66F, 0xA670, 0xA674, 0xA67E, // 368
|
||||
0xA69E, 0xA6A1, 0xA6F0, 0xA6F2, 0xA800, 0xA801, 0xA806, 0xA807, // 376
|
||||
0xA840, 0xA841, 0xA882, 0xA883, 0xA8C4, 0xA8C5, 0xA8E0, 0xA8F2, // 384
|
||||
0xA90A, 0xA90B, 0xA92B, 0xA92E, 0xA930, 0xA931, 0xA953, 0xA954, // 392
|
||||
0xA984, 0xA985, 0xA9B3, 0xA9B4, 0xA9C0, 0xA9C1, 0xAA00, 0xAA01, // 400
|
||||
0xAA80, 0xAAB1, 0xAAB2, 0xAAB5, 0xAAB7, 0xAAB9, 0xAABE, 0xAAC0, // 408
|
||||
0xAAC1, 0xAAC2, 0xAAF6, 0xAAF7, 0xABC0, 0xABC1, 0xABED, 0xABEE, // 416
|
||||
0xAC00, 0xAC01, 0xD800, 0xD807, 0xD808, 0xD809, 0xD80C, 0xD80D, // 424
|
||||
0xD811, 0xD812, 0xD81A, 0xD81C, 0xD82F, 0xD830, 0xD834, 0xD835, // 432
|
||||
0xD83A, 0xD83B, 0xDC00, 0xE000, 0xFB1E, 0xFB1F, 0xFDD0, 0xFDD1, // 440
|
||||
0xFE20, 0xFE30, 0x0001, 0x0000, 0x0001, 0x0001, 0x0001, 0x01FD, // 448
|
||||
0x0001, 0x01FE, 0x0001, 0x0280, 0x0001, 0x0281, 0x0001, 0x02B7, // 456
|
||||
0x0001, 0x02B8, 0x0001, 0x02E0, 0x0001, 0x02E1, 0x0001, 0x0308, // 464
|
||||
0x0001, 0x0309, 0x0001, 0x0330, 0x0001, 0x0331, 0x0001, 0x036B, // 472
|
||||
0x0001, 0x036C, 0x0001, 0x0376, 0x0001, 0x037B, 0x0001, 0x0380, // 480
|
||||
0x0001, 0x0381, 0x0001, 0x03A0, 0x0001, 0x03A1, 0x0001, 0x0414, // 488
|
||||
0x0001, 0x0415, 0x0001, 0x0450, 0x0001, 0x0451, 0x0001, 0x0480, // 496
|
||||
0x0001, 0x0481, 0x0001, 0x0500, 0x0001, 0x0501, 0x0001, 0x0537, // 504
|
||||
0x0001, 0x0538, 0x0001, 0x0647, 0x0001, 0x0648, 0x0001, 0x0800, // 512
|
||||
0x0001, 0x0801, 0x0001, 0x0840, 0x0001, 0x0841, 0x0001, 0x0873, // 520
|
||||
0x0001, 0x0874, 0x0001, 0x0896, 0x0001, 0x0897, 0x0001, 0x08F4, // 528
|
||||
0x0001, 0x08F5, 0x0001, 0x0900, 0x0001, 0x0901, 0x0001, 0x0920, // 536
|
||||
0x0001, 0x0921, 0x0001, 0x0980, 0x0001, 0x0981, 0x0001, 0x09A0, // 544
|
||||
0x0001, 0x09A1, 0x0001, 0x0A00, 0x0001, 0x0A01, 0x0001, 0x0A0D, // 552
|
||||
0x0001, 0x0A0E, 0x0001, 0x0A0F, 0x0001, 0x0A10, 0x0001, 0x0A38, // 560
|
||||
0x0001, 0x0A3B, 0x0001, 0x0A3F, 0x0001, 0x0A40, 0x0001, 0x0A60, // 568
|
||||
0x0001, 0x0A61, 0x0001, 0x0A95, 0x0001, 0x0A96, 0x0001, 0x0AC1, // 576
|
||||
0x0001, 0x0AC2, 0x0001, 0x0AE5, 0x0001, 0x0AE7, 0x0001, 0x0B00, // 584
|
||||
0x0001, 0x0B01, 0x0001, 0x0B40, 0x0001, 0x0B41, 0x0001, 0x0B60, // 592
|
||||
0x0001, 0x0B61, 0x0001, 0x0B8F, 0x0001, 0x0B90, 0x0001, 0x0C00, // 600
|
||||
0x0001, 0x0C01, 0x0001, 0x0CA1, 0x0001, 0x0CA2, 0x0001, 0x1005, // 608
|
||||
0x0001, 0x1006, 0x0001, 0x1046, 0x0001, 0x1047, 0x0001, 0x107F, // 616
|
||||
0x0001, 0x1080, 0x0001, 0x1083, 0x0001, 0x1084, 0x0001, 0x10B9, // 624
|
||||
0x0001, 0x10BB, 0x0001, 0x10D0, 0x0001, 0x10D1, 0x0001, 0x1100, // 632
|
||||
0x0001, 0x1104, 0x0001, 0x1127, 0x0001, 0x1128, 0x0001, 0x1133, // 640
|
||||
0x0001, 0x1135, 0x0001, 0x1152, 0x0001, 0x1153, 0x0001, 0x1173, // 648
|
||||
0x0001, 0x1174, 0x0001, 0x1183, 0x0001, 0x1184, 0x0001, 0x11C0, // 656
|
||||
0x0001, 0x11C1, 0x0001, 0x11CA, 0x0001, 0x11CB, 0x0001, 0x1208, // 664
|
||||
0x0001, 0x1209, 0x0001, 0x1235, 0x0001, 0x1237, 0x0001, 0x128F, // 672
|
||||
0x0001, 0x1290, 0x0001, 0x12BE, 0x0001, 0x12BF, 0x0001, 0x12E9, // 680
|
||||
0x0001, 0x12EB, 0x0001, 0x1315, 0x0001, 0x1316, 0x0001, 0x133C, // 688
|
||||
0x0001, 0x133D, 0x0001, 0x133E, 0x0001, 0x133F, 0x0001, 0x134D, // 696
|
||||
0x0001, 0x134E, 0x0001, 0x1357, 0x0001, 0x1358, 0x0001, 0x1366, // 704
|
||||
0x0001, 0x136D, 0x0001, 0x1370, 0x0001, 0x1375, 0x0001, 0x1484, // 712
|
||||
0x0001, 0x1485, 0x0001, 0x14B0, 0x0001, 0x14B1, 0x0001, 0x14BA, // 720
|
||||
0x0001, 0x14BB, 0x0001, 0x14BD, 0x0001, 0x14BE, 0x0001, 0x14C2, // 728
|
||||
0x0001, 0x14C4, 0x0001, 0x158E, 0x0001, 0x158F, 0x0001, 0x15AF, // 736
|
||||
0x0001, 0x15B0, 0x0001, 0x15BF, 0x0001, 0x15C1, 0x0001, 0x160E, // 744
|
||||
0x0001, 0x160F, 0x0001, 0x163F, 0x0001, 0x1640, 0x0001, 0x1680, // 752
|
||||
0x0001, 0x1681, 0x0001, 0x16B6, 0x0001, 0x16B8, 0x0001, 0x1717, // 760
|
||||
0x0001, 0x1718, 0x0001, 0x172B, 0x0001, 0x172C, 0x0001, 0x18B4, // 768
|
||||
0x0001, 0x18B5, 0x0001, 0x1AC0, 0x0001, 0x1AC1, 0x0001, 0x2000, // 776
|
||||
0x0001, 0x2001, 0x0001, 0x3153, 0x0001, 0x3154, 0x0001, 0x4400, // 784
|
||||
0x0001, 0x4401, 0x0001, 0x6A4F, 0x0001, 0x6A50, 0x0001, 0x6AE6, // 792
|
||||
0x0001, 0x6AE7, 0x0001, 0x6AF0, 0x0001, 0x6AF5, 0x0001, 0x6B1C, // 800
|
||||
0x0001, 0x6B1D, 0x0001, 0x6B30, 0x0001, 0x6B37, 0x0001, 0x6F00, // 808
|
||||
0x0001, 0x6F01, 0x0001, 0xBC20, 0x0001, 0xBC21, 0x0001, 0xBC9E, // 816
|
||||
0x0001, 0xBC9F, 0x0001, 0xD165, 0x0001, 0xD16A, 0x0001, 0xD16D, // 824
|
||||
0x0001, 0xD173, 0x0001, 0xD17B, 0x0001, 0xD183, 0x0001, 0xD185, // 832
|
||||
0x0001, 0xD18C, 0x0001, 0xD1AA, 0x0001, 0xD1AE, 0x0001, 0xD242, // 840
|
||||
0x0001, 0xD245, 0x0001, 0xE802, 0x0001, 0xE803, 0x0001, 0xE8D0, // 848
|
||||
0x0001, 0xE8D7};
|
||||
#endif
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
********************************************************************************
|
||||
* Copyright (C) 1999-2014 International Business Machines Corporation and
|
||||
* Copyright (C) 1999-2015 International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************************
|
||||
* Date Name Description
|
||||
|
@ -90,6 +90,7 @@ UnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
|
|||
CASE(21,TestFreezable);
|
||||
CASE(22,TestSpan);
|
||||
CASE(23,TestStringSpan);
|
||||
CASE(24,TestUCAUnsafeBackwards);
|
||||
default: name = ""; break;
|
||||
}
|
||||
}
|
||||
|
@ -1714,6 +1715,12 @@ void UnicodeSetTest::TestSurrogate() {
|
|||
errln((UnicodeString)"FAIL: " + UnicodeString(DATA[i], -1, US_INV) + ".size() == " +
|
||||
set.size() + ", expected 4");
|
||||
}
|
||||
|
||||
{
|
||||
UErrorCode subErr = U_ZERO_ERROR;
|
||||
checkRoundTrip(set);
|
||||
checkSerializeRoundTrip(set, subErr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1730,8 +1737,12 @@ void UnicodeSetTest::TestExhaustive() {
|
|||
logln((UnicodeString)"Testing " + i + ", " + x);
|
||||
_testComplement(i, x, y);
|
||||
|
||||
UnicodeSet &toTest = bitsToSet(i, aa);
|
||||
|
||||
// AS LONG AS WE ARE HERE, check roundtrip
|
||||
checkRoundTrip(bitsToSet(i, aa));
|
||||
checkRoundTrip(toTest);
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
checkSerializeRoundTrip(toTest, ec);
|
||||
|
||||
for (int32_t j = 0; j < limit; ++j) {
|
||||
_testAdd(i,j, x,y,z);
|
||||
|
@ -1922,7 +1933,40 @@ void UnicodeSetTest::checkRoundTrip(const UnicodeSet& s) {
|
|||
checkEqual(s, t, "toPattern(true)");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void UnicodeSetTest::checkSerializeRoundTrip(const UnicodeSet& t, UErrorCode &status) {
|
||||
if(U_FAILURE(status)) return;
|
||||
int32_t len = t.serialize(serializeBuffer.getAlias(), serializeBuffer.getCapacity(), status);
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR) {
|
||||
status = U_ZERO_ERROR;
|
||||
serializeBuffer.resize(len);
|
||||
len = t.serialize(serializeBuffer.getAlias(), serializeBuffer.getCapacity(), status);
|
||||
// let 2nd error stand
|
||||
}
|
||||
if(U_FAILURE(status)) {
|
||||
errln("checkSerializeRoundTrip: error %s serializing buffer\n", u_errorName(status));
|
||||
return;
|
||||
}
|
||||
|
||||
#if 0
|
||||
UnicodeString pat; t.toPattern(pat, FALSE);
|
||||
infoln(pat);
|
||||
printf(" %d: ", len);
|
||||
for(int i=0;i<len;i++) {
|
||||
printf( " %04X ", serializeBuffer.getAlias()[i]);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
UnicodeSet deserialized(serializeBuffer.getAlias(), len, UnicodeSet::kSerialized, status);
|
||||
if(U_FAILURE(status)) {
|
||||
errln("checkSerializeRoundTrip: error %s deserializing buffer: buf %p len %d, original %d\n", u_errorName(status), serializeBuffer.getAlias(), len, t.getRangeCount());
|
||||
return;
|
||||
}
|
||||
|
||||
checkEqual(t, deserialized, "Set was unequal when deserialized");
|
||||
}
|
||||
|
||||
void UnicodeSetTest::copyWithIterator(UnicodeSet& t, const UnicodeSet& s, UBool withRange) {
|
||||
t.clear();
|
||||
UnicodeSetIterator it(s);
|
||||
|
@ -1946,6 +1990,8 @@ void UnicodeSetTest::copyWithIterator(UnicodeSet& t, const UnicodeSet& s, UBool
|
|||
}
|
||||
|
||||
UBool UnicodeSetTest::checkEqual(const UnicodeSet& s, const UnicodeSet& t, const char* message) {
|
||||
assertEquals(UnicodeString("RangeCount: ","") + message, s.getRangeCount(), t.getRangeCount());
|
||||
assertEquals(UnicodeString("size: ","") + message, s.size(), t.size());
|
||||
UnicodeString source; s.toPattern(source, TRUE);
|
||||
UnicodeString result; t.toPattern(result, TRUE);
|
||||
if (s != t) {
|
||||
|
@ -3812,3 +3858,29 @@ void UnicodeSetTest::TestStringSpan() {
|
|||
errln("FAIL: UnicodeSet(%s).spanBack(while longest match) returns the wrong value", pattern);
|
||||
}
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_COLLATION
|
||||
#include "collationroot.h"
|
||||
#include "collationtailoring.h"
|
||||
#endif
|
||||
|
||||
void UnicodeSetTest::TestUCAUnsafeBackwards() {
|
||||
#if !UCONFIG_NO_COLLATION
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
|
||||
// Get the unsafeBackwardsSet
|
||||
const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
errln("FAIL: %s getting root cache entry", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
//const UVersionInfo &version = rootEntry->tailoring->version;
|
||||
const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
|
||||
|
||||
checkSerializeRoundTrip(*unsafeBackwardSet, errorCode);
|
||||
|
||||
if(!logKnownIssue("11891","UnicodeSet fails to round trip on CollationRoot...unsafeBackwards set")) {
|
||||
checkRoundTrip(*unsafeBackwardSet);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2007, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2015, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************
|
||||
**********************************************************************
|
||||
|
@ -18,6 +18,7 @@
|
|||
#include "unicode/uniset.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "intltest.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
class UnicodeSetWithStrings;
|
||||
|
||||
|
@ -87,6 +88,8 @@ private:
|
|||
|
||||
void TestStringSpan();
|
||||
|
||||
void TestUCAUnsafeBackwards();
|
||||
|
||||
private:
|
||||
|
||||
UBool toPatternAux(UChar32 start, UChar32 end);
|
||||
|
@ -136,6 +139,8 @@ private:
|
|||
* get the same thing back
|
||||
*/
|
||||
void checkRoundTrip(const UnicodeSet& s);
|
||||
|
||||
void checkSerializeRoundTrip(const UnicodeSet& s, UErrorCode &ec);
|
||||
|
||||
void copyWithIterator(UnicodeSet& t, const UnicodeSet& s, UBool withRange);
|
||||
|
||||
|
@ -183,6 +188,8 @@ private:
|
|||
|
||||
UConverter *utf8Cnv;
|
||||
|
||||
MaybeStackArray<uint16_t, 16> serializeBuffer;
|
||||
|
||||
public:
|
||||
static UnicodeString escape(const UnicodeString& s);
|
||||
};
|
||||
|
|
43
icu4c/source/tools/gencolusb/Makefile
Normal file
43
icu4c/source/tools/gencolusb/Makefile
Normal file
|
@ -0,0 +1,43 @@
|
|||
## Makefile for rebuilding 'unsafe backward' data
|
||||
## Copyright (c) 2015, International Business Machines Corporation and
|
||||
## others. All Rights Reserved.
|
||||
|
||||
##
|
||||
## CONFIGURATION:
|
||||
## 1. create Makefile.local containing overrides if necessary:
|
||||
## BUILD_ROOT=/home/user/icu-build (location of 'config.status' etc.)
|
||||
## PATH_VAR=DYLD_LIBRARY_PATH (if on OSX etc)
|
||||
##
|
||||
## UPDATING
|
||||
## 1. make 'reset-icu' will reset ICU to 'bootstrap' state, zeroing out source/i18n/collunsafe.h
|
||||
## 2. make 'gen-file' will generate and test source/i18n/collunsafe.h
|
||||
|
||||
subdir=tools/gencolusb
|
||||
srcdir=$(shell pwd)
|
||||
SOURCE_ROOT=$(shell cd ../.. ; pwd)
|
||||
BUILD_ROOT=$(SOURCE_ROOT)
|
||||
BUILD_HERE=$(BUILD_ROOT)/$(subdir)
|
||||
TOOL=extract_unsafe_backwards
|
||||
TEST=verify_uset
|
||||
PATH_VAR=LD_LIBRARY_PATH
|
||||
|
||||
-include Makefile.local
|
||||
|
||||
GEN_FILE=$(SOURCE_ROOT)/i18n/collunsafe.h
|
||||
BUILD_OPTS=-I$(SOURCE_ROOT)/common -I$(SOURCE_ROOT)/i18n -L$(BUILD_ROOT)/lib -licuuc -licui18n -licudata
|
||||
RUN_OPTS=env $(PATH_VAR)=$(BUILD_ROOT)/lib
|
||||
|
||||
reset-icu:
|
||||
>$(GEN_FILE)
|
||||
$(MAKE) -C $(BUILD_ROOT)/i18n
|
||||
|
||||
gen-file: reset-icu
|
||||
mkdir -p $(BUILD_HERE)
|
||||
$(CXX) -o $(BUILD_HERE)/$(TOOL) $(srcdir)/$(TOOL).cpp $(BUILD_OPTS)
|
||||
$(RUN_OPTS) $(BUILD_HERE)/$(TOOL) > $(GEN_FILE) || exit 1
|
||||
$(CXX) -o $(BUILD_HERE)/$(TEST) $(srcdir)/$(TEST).cpp $(BUILD_OPTS)
|
||||
$(RUN_OPTS) $(BUILD_HERE)/$(TEST) || exit 1
|
||||
$(MAKE) -C $(BUILD_ROOT)/i18n
|
||||
$(RUN_OPTS) $(BUILD_HERE)/$(TEST) || exit 1
|
||||
|
||||
.PHONY: reset-icu gen-file
|
9
icu4c/source/tools/gencolusb/README.md
Normal file
9
icu4c/source/tools/gencolusb/README.md
Normal file
|
@ -0,0 +1,9 @@
|
|||
Unsafe-Backward Collator Data
|
||||
===
|
||||
|
||||
This directory contains tools to build the `source/i18n/collunsafe.h`
|
||||
precomputed data.
|
||||
|
||||
See [Makefile](./Makefile) for more details.
|
||||
|
||||
* Copyright (c) 2015, International Business Machines Corporation and others. All Rights Reserved.
|
163
icu4c/source/tools/gencolusb/extract_unsafe_backwards.cpp
Normal file
163
icu4c/source/tools/gencolusb/extract_unsafe_backwards.cpp
Normal file
|
@ -0,0 +1,163 @@
|
|||
/**
|
||||
* Copyright (c) 1999-2015, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*
|
||||
* Generator for source/i18n/collunsafe.h
|
||||
* see Makefile
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "unicode/uversion.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "collationroot.h"
|
||||
#include "collationtailoring.h"
|
||||
|
||||
/**
|
||||
* Define the type of generator to use. Choose one.
|
||||
*/
|
||||
#define SERIALIZE 1 //< Default: use UnicodeSet.serialize() (best, fast, requires new UnicodeSet c'tor)
|
||||
#define RANGES 0 //< Enumerate ranges (works, not as fast)
|
||||
#define PATTERN 0 //< Generate a UnicodeSet pattern (broken AND probably slow)
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
|
||||
// Get the unsafeBackwardsSet
|
||||
const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode));
|
||||
return 1;
|
||||
}
|
||||
const UVersionInfo &version = rootEntry->tailoring->version;
|
||||
const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
|
||||
char verString[20];
|
||||
u_versionToString(version, verString);
|
||||
fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString);
|
||||
int32_t rangeCount = unsafeBackwardSet->getRangeCount();
|
||||
|
||||
#if SERIALIZE
|
||||
fprintf(stderr, ".. serializing\n");
|
||||
// UnicodeSet serialization
|
||||
|
||||
UErrorCode preflightCode = U_ZERO_ERROR;
|
||||
// preflight
|
||||
int32_t serializedCount = unsafeBackwardSet->serialize(NULL,0,preflightCode);
|
||||
if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) {
|
||||
fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode));
|
||||
return 1;
|
||||
}
|
||||
uint16_t *serializedData = new uint16_t[serializedCount];
|
||||
// serialize
|
||||
unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
delete [] serializedData;
|
||||
fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode));
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if PATTERN
|
||||
fprintf(stderr,".. pattern\n");
|
||||
// attempt to use pattern
|
||||
|
||||
UnicodeString pattern;
|
||||
UnicodeSet set(*unsafeBackwardSet);
|
||||
set.compact();
|
||||
set.toPattern(pattern, FALSE);
|
||||
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
// This fails (bug# ?) - which is why this method was abandoned.
|
||||
|
||||
// UnicodeSet usA(pattern, errorCode);
|
||||
// fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode));
|
||||
// return 1;
|
||||
}
|
||||
|
||||
|
||||
const UChar *buf = pattern.getBuffer();
|
||||
int32_t needed = pattern.length();
|
||||
|
||||
// print
|
||||
{
|
||||
char buf2[2048];
|
||||
int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8");
|
||||
buf2[len2]=0;
|
||||
fprintf(stderr,"===\n%s\n===\n", buf2);
|
||||
}
|
||||
|
||||
const UnicodeString unsafeBackwardPattern(FALSE, buf, needed);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
//UnicodeSet us(unsafeBackwardPattern, errorCode);
|
||||
// fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
|
||||
} else {
|
||||
fprintf(stderr, "Uset OK - \n");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// Generate the output file.
|
||||
|
||||
printf("// collunsafe.h\n");
|
||||
printf("// %s\n", U_COPYRIGHT_STRING);
|
||||
printf("\n");
|
||||
printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n");
|
||||
printf("// Machine generated, do not edit.\n");
|
||||
printf("\n");
|
||||
printf("#ifndef COLLUNSAFE_H\n"
|
||||
"#define COLLUNSAFE_H\n"
|
||||
"\n"
|
||||
"#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n");
|
||||
printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString);
|
||||
|
||||
|
||||
|
||||
#if PATTERN
|
||||
printf("#define COLLUNSAFE_PATTERN 1\n");
|
||||
printf("static const int32_t collunsafe_len = %d;\n", needed);
|
||||
printf("static const UChar collunsafe_pattern[collunsafe_len] = {\n");
|
||||
for(int i=0;i<needed;i++) {
|
||||
if( (i>0) && (i%8 == 0) ) {
|
||||
printf(" // %d\n", i);
|
||||
}
|
||||
printf("0x%04X", buf[i]); // TODO check
|
||||
if(i != (needed-1)) {
|
||||
printf(", ");
|
||||
}
|
||||
}
|
||||
printf(" //%d\n};\n", (needed-1));
|
||||
#endif
|
||||
|
||||
#if RANGE
|
||||
printf("#define COLLUNSAFE_RANGE 1\n");
|
||||
printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount);
|
||||
printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2);
|
||||
for(int32_t i=0;i<rangeCount;i++) {
|
||||
printf(" 0x%04X, 0x%04X, // %d\n",
|
||||
unsafeBackwardSet->getRangeStart(i),
|
||||
unsafeBackwardSet->getRangeEnd(i),
|
||||
i);
|
||||
}
|
||||
printf("};\n");
|
||||
#endif
|
||||
|
||||
#if SERIALIZE
|
||||
printf("#define COLLUNSAFE_SERIALIZE 1\n");
|
||||
printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount);
|
||||
printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount);
|
||||
for(int32_t i=0;i<serializedCount;i++) {
|
||||
if( (i>0) && (i%8 == 0) ) {
|
||||
printf(" // %d\n", i);
|
||||
}
|
||||
printf("0x%04X", serializedData[i]); // TODO check
|
||||
if(i != (serializedCount-1)) {
|
||||
printf(", ");
|
||||
}
|
||||
}
|
||||
printf("};\n");
|
||||
#endif
|
||||
|
||||
printf("#endif\n");
|
||||
fflush(stderr);
|
||||
fflush(stdout);
|
||||
return(U_SUCCESS(errorCode)?0:1);
|
||||
}
|
69
icu4c/source/tools/gencolusb/verify_uset.cpp
Normal file
69
icu4c/source/tools/gencolusb/verify_uset.cpp
Normal file
|
@ -0,0 +1,69 @@
|
|||
/**
|
||||
* Copyright (c) 1999-2012, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*
|
||||
* Test for source/i18n/collunsafe.h
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "unicode/ucol.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/coll.h"
|
||||
#include "collation.h"
|
||||
|
||||
#include "collunsafe.h"
|
||||
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
puts("verify");
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
#if defined (COLLUNSAFE_PATTERN)
|
||||
puts("verify pattern");
|
||||
const UnicodeString unsafeBackwardPattern(FALSE, collunsafe_pattern, collunsafe_len);
|
||||
fprintf(stderr, "\n -- pat '%c%c%c%c%c'\n",
|
||||
collunsafe_pattern[0],
|
||||
collunsafe_pattern[1],
|
||||
collunsafe_pattern[2],
|
||||
collunsafe_pattern[3],
|
||||
collunsafe_pattern[4]);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
UnicodeSet us(unsafeBackwardPattern, errorCode);
|
||||
fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined (COLLUNSAFE_RANGE)
|
||||
{
|
||||
puts("verify range");
|
||||
UnicodeSet u;
|
||||
for(int32_t i=0;i<unsafe_rangeCount*2;i+=2) {
|
||||
u.add(unsafe_ranges[i+0],unsafe_ranges[i+1]);
|
||||
}
|
||||
printf("Finished with %d ranges\n", u.getRangeCount());
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined (COLLUNSAFE_SERIALIZE)
|
||||
{
|
||||
puts("verify serialize");
|
||||
UnicodeSet u(unsafe_serializedData, unsafe_serializedCount, UnicodeSet::kSerialized, errorCode);
|
||||
fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
|
||||
printf("Finished deserialize with %d ranges\n", u.getRangeCount());
|
||||
}
|
||||
#endif
|
||||
// if(tailoring.unsafeBackwardSet == NULL) {
|
||||
// errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
// fprintf(stderr, "\n%s:%d: err %s\n", __FILE__, __LINE__, u_errorName(errorCode));
|
||||
// }
|
||||
puts("verify col UCA");
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
Collator *col = Collator::createInstance(Locale::getEnglish(), errorCode);
|
||||
fprintf(stderr, "\n%s:%d: err %s creating collator\n", __FILE__, __LINE__, u_errorName(errorCode));
|
||||
}
|
||||
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue