ICU-13197 fix indexesLength check while loading data, more readable duplicate elimination of noNo mappings

X-SVN-Rev: 40157
This commit is contained in:
Markus Scherer 2017-06-07 18:22:44 +00:00
parent 6c1e41e0f2
commit e05c15a02c
5 changed files with 19 additions and 11 deletions

View file

@ -84,7 +84,7 @@ LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCod
const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
const int32_t *inIndexes=(const int32_t *)inBytes;
int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
if(indexesLength<=IX_MIN_MAYBE_YES) {
if(indexesLength<=IX_MIN_YES_NO_MAPPINGS_ONLY) {
errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
return;
}

View file

@ -5,7 +5,7 @@
* Copyright (C) 2014, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* loadednormalizer2impl.h
* norm2allmodes.h
*
* created on: 2014sep07
* created by: Markus W. Scherer

View file

@ -2645,12 +2645,13 @@ unorm2_swap(const UDataSwapper *ds,
/* check data format and format version */
pInfo=(const UDataInfo *)((const char *)inData+4);
uint8_t formatVersion0=pInfo->formatVersion[0];
if(!(
pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6d &&
pInfo->dataFormat[3]==0x32 &&
(pInfo->formatVersion[0]==1 || pInfo->formatVersion[0]==2)
(formatVersion0==1 || formatVersion0==2)
)) {
udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
@ -2664,10 +2665,16 @@ unorm2_swap(const UDataSwapper *ds,
outBytes=(uint8_t *)outData+headerSize;
inIndexes=(const int32_t *)inBytes;
int32_t minIndexesLength;
if(formatVersion0==1) {
minIndexesLength=Normalizer2Impl::IX_MIN_MAYBE_YES+1;
} else {
minIndexesLength=Normalizer2Impl::IX_MIN_YES_NO_MAPPINGS_ONLY+1;
}
if(length>=0) {
length-=headerSize;
if(length<(int32_t)sizeof(indexes)) {
if(length<minIndexesLength*4) {
udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for Normalizer2 data\n",
length);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

View file

@ -90,16 +90,17 @@ int32_t ExtraData::writeMapping(UChar32 c, const Norm &norm, UnicodeString &data
int32_t ExtraData::writeNoNoMapping(UChar32 c, const Norm &norm,
UnicodeString &dataString,
Hashtable &previousMappings) {
int32_t oldLength=dataString.length();
int32_t offset=oldLength+writeMapping(c, norm, dataString);
UnicodeString newMapping=dataString.tempSubString(oldLength);
UnicodeString newMapping;
int32_t offset=writeMapping(c, norm, newMapping);
int32_t previousOffset=previousMappings.geti(newMapping);
if(previousOffset!=0) {
// Duplicate, remove the new units and point to the old ones.
dataString.truncate(oldLength);
// Duplicate, point to the identical mapping that has already been stored.
offset=previousOffset-1;
} else {
// Enter this new mapping into the hashtable, avoiding value 0 which is "not found".
// Append this new mapping and
// enter it into the hashtable, avoiding value 0 which is "not found".
offset=dataString.length()+offset;
dataString.append(newMapping);
IcuToolErrorCode errorCode("gennorm2/writeExtraData()/Hashtable.puti()");
previousMappings.puti(newMapping, offset+1, errorCode);
}

View file

@ -428,7 +428,7 @@ public final class Normalizer2Impl {
try {
dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
int indexesLength=bytes.getInt()/4; // inIndexes[IX_NORM_TRIE_OFFSET]/4
if(indexesLength<=IX_MIN_MAYBE_YES) {
if(indexesLength<=IX_MIN_YES_NO_MAPPINGS_ONLY) {
throw new ICUUncheckedIOException("Normalizer2 data: not enough indexes");
}
int[] inIndexes=new int[indexesLength];