From a9cedfb775e5c9df5ef69542a8ff68af6d1e697e Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Tue, 20 Jun 2017 22:11:08 +0000 Subject: [PATCH] ICU-12507 bump rbbi data version for change to UTrie2; consolidate version checking. X-SVN-Rev: 40183 --- icu4c/source/common/rbbidata.cpp | 30 +++++++------- icu4c/source/common/rbbidata.h | 5 +++ icu4c/source/common/rbbirb.cpp | 8 ++-- .../src/com/ibm/icu/text/RBBIDataWrapper.java | 18 +++------ .../src/com/ibm/icu/text/RBBIRuleBuilder.java | 40 +++++++++---------- .../ibm/icu/text/RuleBasedBreakIterator.java | 6 +-- icu4j/main/shared/data/icudata.jar | 4 +- icu4j/main/shared/data/icutzdata.jar | 2 +- 8 files changed, 55 insertions(+), 58 deletions(-) diff --git a/icu4c/source/common/rbbidata.cpp b/icu4c/source/common/rbbidata.cpp index 916e2b6d1fd..32f3d32d508 100644 --- a/icu4c/source/common/rbbidata.cpp +++ b/icu4c/source/common/rbbidata.cpp @@ -54,9 +54,8 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) { dh->info.dataFormat[0] == 0x42 && // dataFormat="Brk " dh->info.dataFormat[1] == 0x72 && dh->info.dataFormat[2] == 0x6b && - dh->info.dataFormat[3] == 0x20) - // Note: info.fFormatVersion is duplicated in the RBBIDataHeader, and is - // validated when checking that. + dh->info.dataFormat[3] == 0x20 && + isDataVersionAcceptable(dh->info.formatVersion)) ) { status = U_INVALID_FORMAT_ERROR; return; @@ -67,6 +66,16 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) { fUDataMem = udm; } +UBool RBBIDataWrapper::isDataVersionAcceptable(const uint8_t version[]) { + for (int i=0; ifMagic != 0xb1a0 || fHeader->fFormatVersion[0] != 3) - { + if (fHeader->fMagic != 0xb1a0 || !isDataVersionAcceptable(fHeader->fFormatVersion)) { status = U_INVALID_FORMAT_ERROR; return; } @@ -308,7 +316,7 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD pInfo->dataFormat[1]==0x72 && pInfo->dataFormat[2]==0x6b && pInfo->dataFormat[3]==0x20 && - pInfo->formatVersion[0]==3 )) { + RBBIDataWrapper::isDataVersionAcceptable(pInfo->formatVersion) )) { udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], @@ -329,17 +337,11 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD // // Get the RRBI Data Header, and check that it appears to be OK. // - // Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually - // an int32_t with a value of 1. Starting with ICU 3.4, - // RBBI's fDataFormat matches the dataFormat field from the - // UDataInfo header, four int8_t bytes. The value is {3,1,0,0} - // const uint8_t *inBytes =(const uint8_t *)inData+headerSize; RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes; if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 || - rbbiDH->fFormatVersion[0] != 3 || - ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) - { + !RBBIDataWrapper::isDataVersionAcceptable(rbbiDH->fFormatVersion) || + ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) { udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n"); *status=U_UNSUPPORTED_ERROR; return 0; diff --git a/icu4c/source/common/rbbidata.h b/icu4c/source/common/rbbidata.h index c64c65cb466..f1608b54285 100644 --- a/icu4c/source/common/rbbidata.h +++ b/icu4c/source/common/rbbidata.h @@ -56,6 +56,9 @@ ubrk_swap(const UDataSwapper *ds, U_NAMESPACE_BEGIN +// The current RBBI data format version. +static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {4, 0, 0, 0}; + /* * The following structs map exactly onto the raw data from ICU common data file. */ @@ -152,6 +155,8 @@ public: RBBIDataWrapper(UDataMemory* udm, UErrorCode &status); ~RBBIDataWrapper(); + static UBool isDataVersionAcceptable(const uint8_t version[]); + void init0(); void init(const RBBIDataHeader *data, UErrorCode &status); RBBIDataWrapper *addReference(); diff --git a/icu4c/source/common/rbbirb.cpp b/icu4c/source/common/rbbirb.cpp index f4fa13c9b17..84f9974204b 100644 --- a/icu4c/source/common/rbbirb.cpp +++ b/icu4c/source/common/rbbirb.cpp @@ -177,10 +177,10 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { data->fMagic = 0xb1a0; - data->fFormatVersion[0] = 3; - data->fFormatVersion[1] = 1; - data->fFormatVersion[2] = 0; - data->fFormatVersion[3] = 0; + data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0]; + data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1]; + data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2]; + data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3]; data->fLength = totalSize; data->fCatCount = fSetBuilder->getNumCharCategories(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java index e7607013a31..7e330e60935 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java @@ -38,14 +38,14 @@ final class RBBIDataWrapper { private boolean isBigEndian; - static final int DATA_FORMAT = 0x42726b20; // "Brk " - static final int FORMAT_VERSION = 0x03010000; // 3.1 + static final int DATA_FORMAT = 0x42726b20; // "Brk " + static final int FORMAT_VERSION = 0x04000000; // 4.0.0.0 private static final class IsAcceptable implements Authenticate { - // @Override when we switch to Java 6 @Override public boolean isDataVersionAcceptable(byte version[]) { - return version[0] == (FORMAT_VERSION >>> 24); + int intVersion = (version[0] << 24) + (version[1] << 16) + (version[2] << 8) + version[3]; + return intVersion == FORMAT_VERSION; } } private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); @@ -104,7 +104,6 @@ final class RBBIDataWrapper { */ final static class RBBIDataHeader { int fMagic; // == 0xbla0 - int fVersion; // == 1 (for ICU 3.2 and earlier. byte[] fFormatVersion; // For ICU 3.4 and later. int fLength; // Total length in bytes of this RBBI Data, // including all sections, not just the header. @@ -162,10 +161,6 @@ final class RBBIDataWrapper { // Read in the RBBI data header... This.fHeader = new RBBIDataHeader(); This.fHeader.fMagic = bytes.getInt(); - // Read the same 4 bytes as an int and as a byte array: The data format could be - // the old fVersion=1 (TODO: probably not with a real ICU data header?) - // or the new fFormatVersion=3.x. - This.fHeader.fVersion = bytes.getInt(bytes.position()); This.fHeader.fFormatVersion[0] = bytes.get(); This.fHeader.fFormatVersion[1] = bytes.get(); This.fHeader.fFormatVersion[2] = bytes.get(); @@ -189,10 +184,7 @@ final class RBBIDataWrapper { ICUBinary.skipBytes(bytes, 6 * 4); // uint32_t fReserved[6]; - if (This.fHeader.fMagic != 0xb1a0 || - ! (This.fHeader.fVersion == 1 || // ICU 3.2 and earlier - This.fHeader.fFormatVersion[0] == 3) // ICU 3.4 - ) { + if (This.fHeader.fMagic != 0xb1a0 || !IS_ACCEPTABLE.isDataVersionAcceptable(This.fHeader.fFormatVersion)) { throw new IOException("Break Iterator Rule Data Magic Number Incorrect, or unsupported data version."); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java index c56c0fa16fa..961a23c1f8b 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java @@ -25,17 +25,17 @@ class RBBIRuleBuilder { // This is the main class for building (compiling) break rules into the tables // required by the runtime RBBI engine. // - + String fDebugEnv; // controls debug trace output String fRules; // The rule string that we are compiling RBBIRuleScanner fScanner; // The scanner. - + // // There are four separate parse trees generated, one for each of the // forward rules, reverse rules, safe forward rules and safe reverse rules. // This array references the root of each of the trees. - // + // RBBINode[] fTreeRoots = new RBBINode[4]; static final int fForwardTree = 0; // Indexes into the above fTreeRoots array static final int fReverseTree = 1; // for each of the trees. @@ -69,7 +69,7 @@ class RBBIRuleBuilder { // Map Value is the runtime array index. List fRuleStatusVals; // List of Integer objects. Has same layout as the - // runtime array of status (tag) values - + // runtime array of status (tag) values - // number of values in group 1 // first status value in group 1 // 2nd status value in group 1 @@ -84,50 +84,50 @@ class RBBIRuleBuilder { // static final int U_BRK_ERROR_START = 0x10200; /**< Start of codes indicating Break Iterator failures */ - + static final int U_BRK_INTERNAL_ERROR = 0x10201; /**< An internal error (bug) was detected. */ - + static final int U_BRK_HEX_DIGITS_EXPECTED = 0x10202; /**< Hex digits expected as part of a escaped char in a rule. */ - + static final int U_BRK_SEMICOLON_EXPECTED = 0x10203; /**< Missing ';' at the end of a RBBI rule. */ - + static final int U_BRK_RULE_SYNTAX = 0x10204; /**< Syntax error in RBBI rule. */ - + static final int U_BRK_UNCLOSED_SET = 0x10205; /**< UnicodeSet witing an RBBI rule missing a closing ']'. */ - + static final int U_BRK_ASSIGN_ERROR = 0x10206; /**< Syntax error in RBBI rule assignment statement. */ - + static final int U_BRK_VARIABLE_REDFINITION = 0x10207; /**< RBBI rule $Variable redefined. */ - + static final int U_BRK_MISMATCHED_PAREN = 0x10208; /**< Mis-matched parentheses in an RBBI rule. */ - + static final int U_BRK_NEW_LINE_IN_QUOTED_STRING = 0x10209; /**< Missing closing quote in an RBBI rule. */ - + static final int U_BRK_UNDEFINED_VARIABLE = 0x1020a; /**< Use of an undefined $Variable in an RBBI rule. */ - + static final int U_BRK_INIT_ERROR = 0x1020b; /**< Initialization failure. Probable missing ICU Data. */ - + static final int U_BRK_RULE_EMPTY_SET = 0x1020c; /**< Rule contains an empty Unicode Set. */ - + static final int U_BRK_UNRECOGNIZED_OPTION = 0x1020d; /**< !!option in RBBI rules not recognized. */ - + static final int U_BRK_MALFORMED_RULE_TAG = 0x1020e; /**< The {nnn} tag on a rule is mal formed */ static final int U_BRK_MALFORMED_SET = 0x1020f; - + static final int U_BRK_ERROR_LIMIT = 0x10210; /**< This must always be the last value to indicate the limit for Break Iterator failures */ @@ -196,7 +196,7 @@ class RBBIRuleBuilder { // int[] header = new int[RBBIDataWrapper.DH_SIZE]; // sizeof struct RBBIDataHeader header[RBBIDataWrapper.DH_MAGIC] = 0xb1a0; - header[RBBIDataWrapper.DH_FORMATVERSION] = 0x03010000; // uint8_t fFormatVersion[4]; + header[RBBIDataWrapper.DH_FORMATVERSION] = RBBIDataWrapper.FORMAT_VERSION; header[RBBIDataWrapper.DH_LENGTH] = totalSize; // fLength, the total size of all rule sections. header[RBBIDataWrapper.DH_CATCOUNT] = fSetBuilder.getNumCharCategories(); // fCatCount. header[RBBIDataWrapper.DH_FTABLE] = headerSize; // fFTable diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java index 31ed425a735..7b8dce8b29b 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java @@ -1483,10 +1483,8 @@ public class RuleBasedBreakIterator extends BreakIterator { mainLoop: for (;;) { if (c == DONE32) { // Reached end of input string. - if (mode == RBBI_END || fRData.fHeader.fVersion == 1) { - // Either this is the old (ICU 3.2 and earlier) format data which - // does not support explicit support for matching {eof}, or - // we have already done the {eof} iteration. Now is the time + if (mode == RBBI_END) { + // We have already done the {eof} iteration. Now is the time // to unconditionally bail out. if (result == initialPosition) { // Ran off start, no match found. diff --git a/icu4j/main/shared/data/icudata.jar b/icu4j/main/shared/data/icudata.jar index 74c58c1646d..3640d15f93b 100755 --- a/icu4j/main/shared/data/icudata.jar +++ b/icu4j/main/shared/data/icudata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d315546f344483688e78322304130697164e0d0363b20ed00880598630632341 -size 12128031 +oid sha256:29b73bb7468ec529b2ad200e0e5e14a16b53d12cc8ba5ac29b9da9de8968adc0 +size 12128017 diff --git a/icu4j/main/shared/data/icutzdata.jar b/icu4j/main/shared/data/icutzdata.jar index 6c1262dcff2..e56cb375b34 100755 --- a/icu4j/main/shared/data/icutzdata.jar +++ b/icu4j/main/shared/data/icutzdata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:17fb194e1234c73ab09442acf76f1b872d77d8aa7494a06f5964f1342616d69e +oid sha256:644e4eaa7dfdeb72c639d20160274994b0709da05f2b009a306bbc68f440bb87 size 92448