ICU-11886 unsafe-backward set, review comments

. remove #if 0's and stray comments
. cleanup UnicodeSet docs (deserialize does not freeze)
. remove dead code in collationdatareader.cpp

X-SVN-Rev: 37960
This commit is contained in:
Steven R. Loomis 2015-09-15 21:42:56 +00:00
parent 99febdc465
commit 39f7040439
5 changed files with 10 additions and 50 deletions

View file

@ -379,7 +379,6 @@ public:
/**
* Constructs a set from the output of serialize().
* The resulting set will be frozen.
*
* @param buffer the 16 bit array
* @param bufferLen the original length returned from serialize()
@ -391,7 +390,7 @@ public:
UnicodeSet(const uint16_t buffer[], int32_t bufferLen,
ESerialization serialization, UErrorCode &status);
#endif
/**
* Constructs a set from the given pattern. See the class
* description for the syntax of the pattern language.

View file

@ -1474,7 +1474,6 @@ UnicodeSet& UnicodeSet::compact() {
/**
* Deserialize constructor.
* Result will be frozen.
*/
UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization serialization, UErrorCode &ec)
: len(1), capacity(1+START_EXTRA), list(0), bmpSet(0), buffer(0),
@ -1499,7 +1498,7 @@ UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization se
setToBogus();
return;
}
// bmp?
int32_t headerSize = ((data[0]&0x8000)) ?2:1;
int32_t bmpLength = (headerSize==1)?data[0]:data[1];
@ -1522,15 +1521,6 @@ UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization se
printf("<<16@%d[%d] %X\n", i+headerSize, i, list[i]);
#endif
}
/*
if(bmpLength>0) {
bmpSet= new BMPSet(list, bmpLength);
if(bmpSet == NULL) {
ec = U_MEMORY_ALLOCATION_ERROR;
setToBogus();
return;
}
}*/
// copy smp
for(i=bmpLength;i<len;i++) {
list[i] = ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+0] << 16) +

View file

@ -25,18 +25,12 @@
#include "collationrootelements.h"
#include "collationsettings.h"
#include "collationtailoring.h"
#include "collunsafe.h"
#include "normalizer2impl.h"
#include "uassert.h"
#include "ucmndata.h"
#include "utrie2.h"
// #if U_HAVE_COLLUNSAFE
#if 1
#include "collunsafe.h"
#endif
// #end
U_NAMESPACE_BEGIN
namespace {
@ -269,16 +263,7 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes
return;
}
if(baseData == NULL) {
#if defined( COLLUNSAFE_COLL_VERSION )
#if defined(COLLUNSAFE_RANGES) /* slower but still an improvement*/
tailoring.unsafeBackwardSet = new UnicodeSet();
for(int32_t i=0;i<unsafe_rangeCount*2;i+=2) {
tailoring.unsafeBackwardSet->add(unsafe_ranges[i+0],unsafe_ranges[i+1]);
}
tailoring.unsafeBackwardSet->freeze();
#elif defined (COLLUNSAFE_SERIALIZE)
/* faster */
#if defined(COLLUNSAFE_COLL_VERSION) && defined (COLLUNSAFE_SERIALIZE)
tailoring.unsafeBackwardSet = new UnicodeSet(unsafe_serializedData, unsafe_serializedCount, UnicodeSet::kSerialized, errorCode);
if(tailoring.unsafeBackwardSet == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
@ -286,10 +271,6 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes
} else if (U_FAILURE(errorCode)) {
return;
}
#else
#error no unsafe-backwards strategy chosen
#endif
#else
// Create the unsafe-backward set for the root collator.
// Include all non-zero combining marks and trail surrogates.
@ -308,7 +289,7 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes
return;
}
data->nfcImpl.addLcccChars(*tailoring.unsafeBackwardSet);
#endif
#endif // !COLLUNSAFE_SERIALIZE || !COLLUNSAFE_COLL_VERSION
} else {
// Clone the root collator's set contents.
tailoring.unsafeBackwardSet = static_cast<UnicodeSet *>(

View file

@ -1966,17 +1966,6 @@ void UnicodeSetTest::checkSerializeRoundTrip(const UnicodeSet& t, UErrorCode &st
errln("checkSerializeRoundTrip: error %s serializing buffer\n", u_errorName(status));
return;
}
#if 0
UnicodeString pat; t.toPattern(pat, FALSE);
infoln(pat);
printf(" %d: ", len);
for(int i=0;i<len;i++) {
printf( " %04X ", serializeBuffer.getAlias()[i]);
}
printf("\n");
#endif
UnicodeSet deserialized(serializeBuffer.getAlias(), len, UnicodeSet::kSerialized, status);
if(U_FAILURE(status)) {
errln("checkSerializeRoundTrip: error %s deserializing buffer: buf %p len %d, original %d\n", u_errorName(status), serializeBuffer.getAlias(), len, t.getRangeCount());

View file

@ -15,9 +15,9 @@
/**
* Define the type of generator to use. Choose one.
*/
#define SERIALIZE 1 //< Default: use UnicodeSet.serialize() (best, fast, requires new UnicodeSet c'tor)
#define RANGES 0 //< Enumerate ranges (works, not as fast)
#define PATTERN 0 //< Generate a UnicodeSet pattern (broken AND probably slow)
#define SERIALIZE 1 //< Default: use UnicodeSet.serialize() and a new internal c'tor
#define RANGES 0 //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp)
#define PATTERN 0 //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp)
int main(int argc, const char *argv[]) {
UErrorCode errorCode = U_ZERO_ERROR;
@ -57,7 +57,7 @@ int main(int argc, const char *argv[]) {
#endif
#if PATTERN
fprintf(stderr,".. pattern\n");
fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n");
// attempt to use pattern
UnicodeString pattern;
@ -128,6 +128,7 @@ int main(int argc, const char *argv[]) {
#endif
#if RANGE
fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n");
printf("#define COLLUNSAFE_RANGE 1\n");
printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount);
printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2);