mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-11 08:01:32 +00:00
ICU-4157 Add compatibility with earlier compiled break rules, in addition to the new format for Unicode 4.1
X-SVN-Rev: 17405
This commit is contained in:
parent
f4130d74f3
commit
a992612ff4
5 changed files with 72 additions and 25 deletions
|
@ -1026,10 +1026,21 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
|
|||
// loop until we reach the beginning of the text or transition to state 0
|
||||
for (;;) {
|
||||
if (hasPassedStartText) {
|
||||
// end of input is hardwired by rule builder as category #1.
|
||||
// Ran off the beginning of text.
|
||||
if (*(int32_t *)fData->fHeader->fFormatVersion == 1) {
|
||||
// This is the old (ICU 3.2 and earlier) format data.
|
||||
// No explicit support for matching {eof}. Did have hack, though...
|
||||
if (row->fLookAhead != 0 && lookaheadResult == 0) {
|
||||
result = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
// Newer data format, with support for {eof}.
|
||||
// end of input is hardwired by rule builder as category/column 1.
|
||||
category = 1;
|
||||
} else {
|
||||
// look up the current character's category
|
||||
// Not at {eof}.
|
||||
// look up the current character's category (the table column)
|
||||
UTRIE_GET16(&fData->fTrie, c, category);
|
||||
}
|
||||
|
||||
|
|
|
@ -69,8 +69,11 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
|
|||
return;
|
||||
}
|
||||
fHeader = data;
|
||||
if (fHeader->fMagic != 0xb1a0) {
|
||||
status = U_BRK_INTERNAL_ERROR;
|
||||
if (fHeader->fMagic != 0xb1a0 ||
|
||||
!(fHeader->fFormatVersion[0] == 3 || // ICU 3.4
|
||||
*(int32_t *)fHeader->fFormatVersion == 1)) // ICU 3.2 and earlier.
|
||||
{
|
||||
status = U_INVALID_FORMAT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -234,7 +237,8 @@ void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *tab
|
|||
#ifdef RBBI_DEBUG
|
||||
void RBBIDataWrapper::printData() {
|
||||
RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader);
|
||||
RBBIDebugPrintf(" Version = %d\n", fHeader->fVersion);
|
||||
RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1],
|
||||
fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]);
|
||||
RBBIDebugPrintf(" total length of data = %d\n", fHeader->fLength);
|
||||
RBBIDebugPrintf(" number of character categories = %d\n\n", fHeader->fCatCount);
|
||||
|
||||
|
@ -302,10 +306,16 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
|
|||
//
|
||||
// Get the RRBI Data Header, and check that it appears to be OK.
|
||||
//
|
||||
// Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually
|
||||
// an int32_t with a value of 1. Starting with ICU 3.4,
|
||||
// RBBI's fDataFormat matches the dataFormat field from the
|
||||
// UDataInfo header, four int8_t bytes. The value is {3,1,0,0}
|
||||
//
|
||||
const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
|
||||
RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes;
|
||||
UBool formatVersionOne = ds->readUInt32(*(int32_t *)rbbiDH->fFormatVersion) == 1;
|
||||
if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 ||
|
||||
ds->readUInt32(rbbiDH->fVersion) != 1 ||
|
||||
!(formatVersionOne || rbbiDH->fFormatVersion[0] == 3) ||
|
||||
ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader))
|
||||
{
|
||||
udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
|
||||
|
@ -340,7 +350,9 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
|
|||
// we need to reference the header to locate the data, and an
|
||||
// inplace swap of the header leaves it unusable.
|
||||
//
|
||||
uint8_t *outBytes = (uint8_t *)outData + headerSize;
|
||||
uint8_t *outBytes = (uint8_t *)outData + headerSize;
|
||||
RBBIDataHeader *outputDH = (RBBIDataHeader *)outBytes;
|
||||
|
||||
int32_t tableStartOffset;
|
||||
int32_t tableLength;
|
||||
|
||||
|
@ -416,8 +428,16 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
|
|||
ds->swapArray32(ds, inBytes+ds->readUInt32(rbbiDH->fStatusTable), ds->readUInt32(rbbiDH->fStatusTableLen),
|
||||
outBytes+ds->readUInt32(rbbiDH->fStatusTable), status);
|
||||
|
||||
// And, last, the header. All 32 bit values.
|
||||
ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status);
|
||||
// And, last, the header.
|
||||
// For the old version one format, the entire header consists of int32_t values.
|
||||
// For the newer formats, the fDataFormat field is an array of four bytes.
|
||||
// Swap the whole thing as int32_t, then, for the newer format, re-swap the one field.
|
||||
//
|
||||
ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status);
|
||||
if (formatVersionOne == FALSE) {
|
||||
ds->swapArray32(ds, outputDH->fFormatVersion, 4, outputDH->fFormatVersion, status);
|
||||
}
|
||||
|
||||
|
||||
return totalSize;
|
||||
}
|
||||
|
|
|
@ -57,17 +57,22 @@ U_NAMESPACE_BEGIN
|
|||
* The following structs map exactly onto the raw data from ICU common data file.
|
||||
*/
|
||||
struct RBBIDataHeader {
|
||||
uint32_t fMagic; /* == 0xbla0 */
|
||||
uint32_t fVersion; /* == 1 */
|
||||
uint32_t fLength; /* Total length in bytes of this RBBI Data, */
|
||||
/* including all sections, not just the header. */
|
||||
uint32_t fCatCount; /* Number of character categories. */
|
||||
uint32_t fMagic; /* == 0xbla0 */
|
||||
uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */
|
||||
/* if there is one associated with this data. */
|
||||
/* (version originates in rbbi, is copied to UDataInfo) */
|
||||
/* For ICU 3.2 and earlier, this field was */
|
||||
/* uint32_t fVersion */
|
||||
/* with a value of 1. */
|
||||
uint32_t fLength; /* Total length in bytes of this RBBI Data, */
|
||||
/* including all sections, not just the header. */
|
||||
uint32_t fCatCount; /* Number of character categories. */
|
||||
|
||||
/* */
|
||||
/* Offsets and sizes of each of the subsections within the RBBI data. */
|
||||
/* All offsets are bytes from the start of the RBBIDataHeader. */
|
||||
/* All sizes are in bytes. */
|
||||
/* */
|
||||
/* */
|
||||
/* Offsets and sizes of each of the subsections within the RBBI data. */
|
||||
/* All offsets are bytes from the start of the RBBIDataHeader. */
|
||||
/* All sizes are in bytes. */
|
||||
/* */
|
||||
uint32_t fFTable; /* forward state transition table. */
|
||||
uint32_t fFTableLen;
|
||||
uint32_t fRTable; /* Offset to the reverse state transition table. */
|
||||
|
|
|
@ -171,10 +171,13 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
|
|||
uprv_memset(data, 0, totalSize);
|
||||
|
||||
|
||||
data->fMagic = 0xb1a0;
|
||||
data->fVersion = 1;
|
||||
data->fLength = totalSize;
|
||||
data->fCatCount = fSetBuilder->getNumCharCategories();
|
||||
data->fMagic = 0xb1a0;
|
||||
data->fFormatVersion[0] = 3;
|
||||
data->fFormatVersion[1] = 1;
|
||||
data->fFormatVersion[2] = 0;
|
||||
data->fFormatVersion[3] = 0;
|
||||
data->fLength = totalSize;
|
||||
data->fCatCount = fSetBuilder->getNumCharCategories();
|
||||
|
||||
data->fFTable = headerSize;
|
||||
data->fFTableLen = forwardTableSize;
|
||||
|
|
|
@ -37,6 +37,8 @@
|
|||
#include "uoptions.h"
|
||||
#include "unewdata.h"
|
||||
#include "ucmndata.h"
|
||||
#include "rbbidata.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -108,8 +110,10 @@ DataHeader dh ={
|
|||
0, // reserved
|
||||
|
||||
{ 0x42, 0x72, 0x6b, 0x20 }, // dataFormat="Brk "
|
||||
{ 3, 0, 0, 0 }, // formatVersion
|
||||
{ 4, 0, 0, 0 } // dataVersion (Unicode version)
|
||||
{ 0xff, 0, 0, 0 }, // formatVersion. Filled in later with values
|
||||
// from the RBBI rule builder. The values declared
|
||||
// here should never appear in any real RBBI data.
|
||||
{ 4, 1, 0, 0 } // dataVersion (Unicode version)
|
||||
}};
|
||||
|
||||
#endif
|
||||
|
@ -318,6 +322,8 @@ int main(int argc, char **argv) {
|
|||
const uint8_t *outData;
|
||||
outData = bi->getBinaryRules(outDataSize);
|
||||
|
||||
// Copy the data format version numbers from the RBBI data header into the UDataMemory header.
|
||||
uprv_memcpy(dh.info.formatVersion, ((RBBIDataHeader *)outData)->fFormatVersion, sizeof(dh.info.formatVersion));
|
||||
|
||||
//
|
||||
// Create the output file
|
||||
|
@ -330,6 +336,8 @@ int main(int argc, char **argv) {
|
|||
outFileName, u_errorName(status));
|
||||
exit(status);
|
||||
}
|
||||
|
||||
|
||||
// Write the data itself.
|
||||
udata_writeBlock(pData, outData, outDataSize);
|
||||
// finish up
|
||||
|
|
Loading…
Add table
Reference in a new issue