ICU-4157 Add compatibility with earlier compiled break rules, in addition to the new format for Unicode 4.1

X-SVN-Rev: 17405
This commit is contained in:
Andy Heninger 2005-03-28 05:21:50 +00:00
parent f4130d74f3
commit a992612ff4
5 changed files with 72 additions and 25 deletions

View file

@ -1026,10 +1026,21 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
// loop until we reach the beginning of the text or transition to state 0
for (;;) {
if (hasPassedStartText) {
// end of input is hardwired by rule builder as category #1.
// Ran off the beginning of text.
if (*(int32_t *)fData->fHeader->fFormatVersion == 1) {
// This is the old (ICU 3.2 and earlier) format data.
// No explicit support for matching {eof}. Did have hack, though...
if (row->fLookAhead != 0 && lookaheadResult == 0) {
result = 0;
}
break;
}
// Newer data format, with support for {eof}.
// end of input is hardwired by rule builder as category/column 1.
category = 1;
} else {
// look up the current character's category
// Not at {eof}.
// look up the current character's category (the table column)
UTRIE_GET16(&fData->fTrie, c, category);
}

View file

@ -69,8 +69,11 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
return;
}
fHeader = data;
if (fHeader->fMagic != 0xb1a0) {
status = U_BRK_INTERNAL_ERROR;
if (fHeader->fMagic != 0xb1a0 ||
!(fHeader->fFormatVersion[0] == 3 || // ICU 3.4
*(int32_t *)fHeader->fFormatVersion == 1)) // ICU 3.2 and earlier.
{
status = U_INVALID_FORMAT_ERROR;
return;
}
@ -234,7 +237,8 @@ void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *tab
#ifdef RBBI_DEBUG
void RBBIDataWrapper::printData() {
RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader);
RBBIDebugPrintf(" Version = %d\n", fHeader->fVersion);
RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1],
fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]);
RBBIDebugPrintf(" total length of data = %d\n", fHeader->fLength);
RBBIDebugPrintf(" number of character categories = %d\n\n", fHeader->fCatCount);
@ -302,10 +306,16 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
//
// Get the RRBI Data Header, and check that it appears to be OK.
//
// Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually
// an int32_t with a value of 1. Starting with ICU 3.4,
// RBBI's fDataFormat matches the dataFormat field from the
// UDataInfo header, four int8_t bytes. The value is {3,1,0,0}
//
const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes;
UBool formatVersionOne = ds->readUInt32(*(int32_t *)rbbiDH->fFormatVersion) == 1;
if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 ||
ds->readUInt32(rbbiDH->fVersion) != 1 ||
!(formatVersionOne || rbbiDH->fFormatVersion[0] == 3) ||
ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader))
{
udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
@ -340,7 +350,9 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
// we need to reference the header to locate the data, and an
// inplace swap of the header leaves it unusable.
//
uint8_t *outBytes = (uint8_t *)outData + headerSize;
uint8_t *outBytes = (uint8_t *)outData + headerSize;
RBBIDataHeader *outputDH = (RBBIDataHeader *)outBytes;
int32_t tableStartOffset;
int32_t tableLength;
@ -416,8 +428,16 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
ds->swapArray32(ds, inBytes+ds->readUInt32(rbbiDH->fStatusTable), ds->readUInt32(rbbiDH->fStatusTableLen),
outBytes+ds->readUInt32(rbbiDH->fStatusTable), status);
// And, last, the header. All 32 bit values.
ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status);
// And, last, the header.
// For the old version one format, the entire header consists of int32_t values.
// For the newer formats, the fDataFormat field is an array of four bytes.
// Swap the whole thing as int32_t, then, for the newer format, re-swap the one field.
//
ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status);
if (formatVersionOne == FALSE) {
ds->swapArray32(ds, outputDH->fFormatVersion, 4, outputDH->fFormatVersion, status);
}
return totalSize;
}

View file

@ -57,17 +57,22 @@ U_NAMESPACE_BEGIN
* The following structs map exactly onto the raw data from ICU common data file.
*/
struct RBBIDataHeader {
uint32_t fMagic; /* == 0xbla0 */
uint32_t fVersion; /* == 1 */
uint32_t fLength; /* Total length in bytes of this RBBI Data, */
/* including all sections, not just the header. */
uint32_t fCatCount; /* Number of character categories. */
uint32_t fMagic; /* == 0xbla0 */
uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */
/* if there is one associated with this data. */
/* (version originates in rbbi, is copied to UDataInfo) */
/* For ICU 3.2 and earlier, this field was */
/* uint32_t fVersion */
/* with a value of 1. */
uint32_t fLength; /* Total length in bytes of this RBBI Data, */
/* including all sections, not just the header. */
uint32_t fCatCount; /* Number of character categories. */
/* */
/* Offsets and sizes of each of the subsections within the RBBI data. */
/* All offsets are bytes from the start of the RBBIDataHeader. */
/* All sizes are in bytes. */
/* */
/* */
/* Offsets and sizes of each of the subsections within the RBBI data. */
/* All offsets are bytes from the start of the RBBIDataHeader. */
/* All sizes are in bytes. */
/* */
uint32_t fFTable; /* forward state transition table. */
uint32_t fFTableLen;
uint32_t fRTable; /* Offset to the reverse state transition table. */

View file

@ -171,10 +171,13 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
uprv_memset(data, 0, totalSize);
data->fMagic = 0xb1a0;
data->fVersion = 1;
data->fLength = totalSize;
data->fCatCount = fSetBuilder->getNumCharCategories();
data->fMagic = 0xb1a0;
data->fFormatVersion[0] = 3;
data->fFormatVersion[1] = 1;
data->fFormatVersion[2] = 0;
data->fFormatVersion[3] = 0;
data->fLength = totalSize;
data->fCatCount = fSetBuilder->getNumCharCategories();
data->fFTable = headerSize;
data->fFTableLen = forwardTableSize;

View file

@ -37,6 +37,8 @@
#include "uoptions.h"
#include "unewdata.h"
#include "ucmndata.h"
#include "rbbidata.h"
#include "cmemory.h"
#include <stdio.h>
#include <stdlib.h>
@ -108,8 +110,10 @@ DataHeader dh ={
0, // reserved
{ 0x42, 0x72, 0x6b, 0x20 }, // dataFormat="Brk "
{ 3, 0, 0, 0 }, // formatVersion
{ 4, 0, 0, 0 } // dataVersion (Unicode version)
{ 0xff, 0, 0, 0 }, // formatVersion. Filled in later with values
// from the RBBI rule builder. The values declared
// here should never appear in any real RBBI data.
{ 4, 1, 0, 0 } // dataVersion (Unicode version)
}};
#endif
@ -318,6 +322,8 @@ int main(int argc, char **argv) {
const uint8_t *outData;
outData = bi->getBinaryRules(outDataSize);
// Copy the data format version numbers from the RBBI data header into the UDataMemory header.
uprv_memcpy(dh.info.formatVersion, ((RBBIDataHeader *)outData)->fFormatVersion, sizeof(dh.info.formatVersion));
//
// Create the output file
@ -330,6 +336,8 @@ int main(int argc, char **argv) {
outFileName, u_errorName(status));
exit(status);
}
// Write the data itself.
udata_writeBlock(pData, outData, outDataSize);
// finish up