ICU-12646 Error code for long confusable entries. Documenting spoof data binary file format changes.

X-SVN-Rev: 39360
This commit is contained in:
Shane Carr 2016-09-27 17:46:10 +00:00
parent 6e6eb51366
commit 09ad3d8e4b
2 changed files with 18 additions and 1 deletions

View file

@ -344,6 +344,13 @@ void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesL
SPUString *targetMapping = static_cast<SPUString *>(uhash_iget(fTable, keyChar));
U_ASSERT(targetMapping != NULL);
// Set an error code if trying to consume a long string. Otherwise,
// codePointAndLengthToKey will abort on a U_ASSERT.
if (targetMapping->fStr->length() > 256) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
int32_t key = ConfusableDataUtils::codePointAndLengthToKey(keyChar,
targetMapping->fStr->length());
int32_t value = targetMapping->fCharOrStrTableIndex;

View file

@ -132,7 +132,7 @@ public:
//
// Confusable Mappings Data Structures
// Confusable Mappings Data Structures, version 2.0
//
// For the confusable data, we are essentially implementing a map,
// key: a code point
@ -156,6 +156,16 @@ public:
//
// There is no nul character or other mark between adjacent strings.
//
//----------------------------------------------------------------------------
//
// Changes from format version 1 to format version 2:
// 1) Removal of the whole-script confusable data tables.
// 2) Removal of the SL/SA/ML/MA and multi-table flags in the key bitmask.
// 3) Expansion of string length value in the key bitmask from 2 bits to 8 bits.
// 4) Removal of the string lengths table since 8 bits is sufficient for the
// lengths of all entries in confusables.txt.
// Internal functions for manipulating confusable data table keys
#define USPOOF_CONFUSABLE_DATA_FORMAT_VERSION 2 // version for ICU 58