From 09ad3d8e4b76ba7461db8bd7e2b0a883f553c3e3 Mon Sep 17 00:00:00 2001 From: Shane Carr Date: Tue, 27 Sep 2016 17:46:10 +0000 Subject: [PATCH] ICU-12646 Error code for long confusable entries. Documenting spoof data binary file format changes. X-SVN-Rev: 39360 --- icu4c/source/i18n/uspoof_conf.cpp | 7 +++++++ icu4c/source/i18n/uspoof_impl.h | 12 +++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/icu4c/source/i18n/uspoof_conf.cpp b/icu4c/source/i18n/uspoof_conf.cpp index 9584e9484e5..587dc4e01c3 100644 --- a/icu4c/source/i18n/uspoof_conf.cpp +++ b/icu4c/source/i18n/uspoof_conf.cpp @@ -344,6 +344,13 @@ void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesL SPUString *targetMapping = static_cast(uhash_iget(fTable, keyChar)); U_ASSERT(targetMapping != NULL); + // Set an error code if trying to consume a long string. Otherwise, + // codePointAndLengthToKey will abort on a U_ASSERT. + if (targetMapping->fStr->length() > 256) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + int32_t key = ConfusableDataUtils::codePointAndLengthToKey(keyChar, targetMapping->fStr->length()); int32_t value = targetMapping->fCharOrStrTableIndex; diff --git a/icu4c/source/i18n/uspoof_impl.h b/icu4c/source/i18n/uspoof_impl.h index 82b8b5d5704..530767038f3 100644 --- a/icu4c/source/i18n/uspoof_impl.h +++ b/icu4c/source/i18n/uspoof_impl.h @@ -132,7 +132,7 @@ public: // -// Confusable Mappings Data Structures +// Confusable Mappings Data Structures, version 2.0 // // For the confusable data, we are essentially implementing a map, // key: a code point @@ -156,6 +156,16 @@ public: // // There is no nul character or other mark between adjacent strings. // +//---------------------------------------------------------------------------- +// +// Changes from format version 1 to format version 2: +// 1) Removal of the whole-script confusable data tables. +// 2) Removal of the SL/SA/ML/MA and multi-table flags in the key bitmask. +// 3) Expansion of string length value in the key bitmask from 2 bits to 8 bits. +// 4) Removal of the string lengths table since 8 bits is sufficient for the +// lengths of all entries in confusables.txt. + + // Internal functions for manipulating confusable data table keys #define USPOOF_CONFUSABLE_DATA_FORMAT_VERSION 2 // version for ICU 58