mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 23:10:40 +00:00
ICU-10810 genrb: preflight strings on final parse tree, not while building the tree
X-SVN-Rev: 35544
This commit is contained in:
parent
933fed99ed
commit
9e4365c9e6
3 changed files with 160 additions and 115 deletions
|
@ -924,8 +924,11 @@ addCollation(ParseState* state, struct SResource *result, const char *collation
|
|||
res_close(result);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (uprv_strcmp(subtag, "Version") == 0)
|
||||
if (result == NULL)
|
||||
{
|
||||
// Ignore the parsed resources, continue parsing.
|
||||
}
|
||||
else if (uprv_strcmp(subtag, "Version") == 0)
|
||||
{
|
||||
char ver[40];
|
||||
int32_t length = member->u.fString.fLength;
|
||||
|
@ -939,13 +942,7 @@ addCollation(ParseState* state, struct SResource *result, const char *collation
|
|||
u_versionFromString(version, ver);
|
||||
|
||||
table_add(result, member, line, status);
|
||||
|
||||
}
|
||||
else if (uprv_strcmp(subtag, "Override") == 0)
|
||||
{
|
||||
// UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0);
|
||||
table_add(result, member, line, status);
|
||||
|
||||
member = NULL;
|
||||
}
|
||||
else if(uprv_strcmp(subtag, "%%CollationBin")==0)
|
||||
{
|
||||
|
@ -959,12 +956,17 @@ addCollation(ParseState* state, struct SResource *result, const char *collation
|
|||
// all sub-elements of the collation table, including the Version.
|
||||
/* in order to achieve smaller data files, we can direct genrb */
|
||||
/* to omit collation rules */
|
||||
if(state->omitCollationRules) {
|
||||
bundle_closeString(state->bundle, member);
|
||||
} else {
|
||||
if(!state->omitCollationRules) {
|
||||
table_add(result, member, line, status);
|
||||
member = NULL;
|
||||
}
|
||||
}
|
||||
else // Just copy non-special items.
|
||||
{
|
||||
table_add(result, member, line, status);
|
||||
member = NULL;
|
||||
}
|
||||
res_close(member); // TODO: use LocalPointer
|
||||
if (U_FAILURE(*status))
|
||||
{
|
||||
res_close(result);
|
||||
|
@ -1066,6 +1068,11 @@ addCollation(ParseState* state, struct SResource *result, const char *collation
|
|||
return result;
|
||||
}
|
||||
|
||||
static UBool
|
||||
keepCollationType(const char *type) {
|
||||
return gIncludeUnihanColl || uprv_strcmp(type, "unihan") != 0;
|
||||
}
|
||||
|
||||
static struct SResource *
|
||||
parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
|
||||
{
|
||||
|
@ -1145,9 +1152,14 @@ parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n
|
|||
/* then, we cannot handle aliases */
|
||||
if(token == TOK_OPEN_BRACE) {
|
||||
token = getToken(state, &tokenValue, &comment, &line, status);
|
||||
collationRes = table_open(state->bundle, subtag, NULL, status);
|
||||
collationRes = addCollation(state, collationRes, subtag, startline, status); /* need to parse the collation data regardless */
|
||||
if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) {
|
||||
if (keepCollationType(subtag)) {
|
||||
collationRes = table_open(state->bundle, subtag, NULL, status);
|
||||
} else {
|
||||
collationRes = NULL;
|
||||
}
|
||||
// need to parse the collation data regardless
|
||||
collationRes = addCollation(state, collationRes, subtag, startline, status);
|
||||
if (collationRes != NULL) {
|
||||
table_add(result, collationRes, startline, status);
|
||||
}
|
||||
} else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2000-2012, International Business Machines
|
||||
* Copyright (C) 2000-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -25,6 +25,7 @@
|
|||
|
||||
#include "uarrsort.h"
|
||||
#include "uelement.h"
|
||||
#include "uhash.h"
|
||||
#include "uinvchar.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
@ -104,6 +105,19 @@ bundle_compactStrings(struct SRBRoot *bundle, UErrorCode *status);
|
|||
|
||||
/* Writing Functions */
|
||||
|
||||
/*
|
||||
* Preflight strings.
|
||||
* Find duplicates and count the total number of string code units
|
||||
* so that they can be written first to the 16-bit array,
|
||||
* for minimal string and container storage.
|
||||
*
|
||||
* We walk the final parse tree, rather than collecting this information while building it,
|
||||
* so that we need not deal with changes to the parse tree (especially removing resources).
|
||||
*/
|
||||
static void
|
||||
res_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet,
|
||||
UErrorCode *status);
|
||||
|
||||
/*
|
||||
* type_write16() functions write resource values into f16BitUnits
|
||||
* and determine the resource item word, if possible.
|
||||
|
@ -141,6 +155,92 @@ res_write(UNewDataMemory *mem, uint32_t *byteOffset,
|
|||
struct SRBRoot *bundle, struct SResource *res,
|
||||
UErrorCode *status);
|
||||
|
||||
static void
|
||||
string_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet,
|
||||
UErrorCode *status) {
|
||||
res->u.fString.fSame = uhash_get(stringSet, res);
|
||||
if (res->u.fString.fSame != NULL) {
|
||||
return; /* This is a duplicate of an earlier-visited string. */
|
||||
}
|
||||
/* Put this string into the set for finding duplicates. */
|
||||
uhash_put(stringSet, res, res, status);
|
||||
|
||||
if (bundle->fStringsForm != STRINGS_UTF16_V1) {
|
||||
const UChar *s = res->u.fString.fChars;
|
||||
int32_t len = res->u.fString.fLength;
|
||||
if (len <= MAX_IMPLICIT_STRING_LENGTH && !U16_IS_TRAIL(s[0]) && len == u_strlen(s)) {
|
||||
/*
|
||||
* This string will be stored without an explicit length.
|
||||
* Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen().
|
||||
*/
|
||||
res->u.fString.fNumCharsForLength = 0;
|
||||
} else if (len <= 0x3ee) {
|
||||
res->u.fString.fNumCharsForLength = 1;
|
||||
} else if (len <= 0xfffff) {
|
||||
res->u.fString.fNumCharsForLength = 2;
|
||||
} else {
|
||||
res->u.fString.fNumCharsForLength = 3;
|
||||
}
|
||||
bundle->f16BitUnitsLength += res->u.fString.fNumCharsForLength + len + 1; /* +1 for the NUL */
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
array_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet,
|
||||
UErrorCode *status) {
|
||||
struct SResource *current;
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
for (current = res->u.fArray.fFirst; current != NULL; current = current->fNext) {
|
||||
res_preflightStrings(bundle, current, stringSet, status);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
table_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet,
|
||||
UErrorCode *status) {
|
||||
struct SResource *current;
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
for (current = res->u.fTable.fFirst; current != NULL; current = current->fNext) {
|
||||
res_preflightStrings(bundle, current, stringSet, status);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
res_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet,
|
||||
UErrorCode *status) {
|
||||
if (U_FAILURE(*status) || res == NULL) {
|
||||
return;
|
||||
}
|
||||
if (res->fRes != RES_BOGUS) {
|
||||
/*
|
||||
* The resource item word was already precomputed, which means
|
||||
* no further data needs to be written.
|
||||
* This might be an integer, or an empty string/binary/etc.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
switch (res->fType) {
|
||||
case URES_STRING:
|
||||
string_preflightStrings(bundle, res, stringSet, status);
|
||||
break;
|
||||
case URES_ARRAY:
|
||||
array_preflightStrings(bundle, res, stringSet, status);
|
||||
break;
|
||||
case URES_TABLE:
|
||||
table_preflightStrings(bundle, res, stringSet, status);
|
||||
break;
|
||||
default:
|
||||
/* Neither a string nor a container. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static uint16_t *
|
||||
reserve16BitUnits(struct SRBRoot *bundle, int32_t length, UErrorCode *status) {
|
||||
if (U_FAILURE(*status)) {
|
||||
|
@ -221,10 +321,7 @@ string_write16(struct SRBRoot *bundle, struct SResource *res, UErrorCode *status
|
|||
struct SResource *same;
|
||||
if ((same = res->u.fString.fSame) != NULL) {
|
||||
/* This is a duplicate. */
|
||||
if (same->fRes == RES_BOGUS) {
|
||||
/* The original has not been visited yet. */
|
||||
string_write16(bundle, same, status);
|
||||
}
|
||||
assert(same->fRes != RES_BOGUS && same->fWritten);
|
||||
res->fRes = same->fRes;
|
||||
res->fWritten = same->fWritten;
|
||||
}
|
||||
|
@ -900,98 +997,43 @@ string_comp(const UElement key1, const UElement key2) {
|
|||
FALSE);
|
||||
}
|
||||
|
||||
struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
|
||||
static struct SResource *
|
||||
stringbase_open(struct SRBRoot *bundle, const char *tag, int8_t type,
|
||||
const UChar *value, int32_t len, const struct UString* comment,
|
||||
UErrorCode *status) {
|
||||
struct SResource *res = res_open(bundle, tag, comment, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
return NULL;
|
||||
}
|
||||
res->fType = URES_STRING;
|
||||
res->fType = type;
|
||||
|
||||
if (len == 0 && gFormatVersion > 1) {
|
||||
res->u.fString.fChars = &gEmptyString;
|
||||
res->fRes = 0;
|
||||
res->fRes = URES_MAKE_EMPTY_RESOURCE(type);
|
||||
res->fWritten = TRUE;
|
||||
return res;
|
||||
}
|
||||
|
||||
res->u.fString.fLength = len;
|
||||
|
||||
if (gFormatVersion > 1) {
|
||||
/* check for duplicates */
|
||||
res->u.fString.fChars = (UChar *)value;
|
||||
if (bundle->fStringSet == NULL) {
|
||||
UErrorCode localStatus = U_ZERO_ERROR; /* if failure: just don't detect dups */
|
||||
bundle->fStringSet = uhash_open(string_hash, string_comp, string_comp, &localStatus);
|
||||
} else {
|
||||
res->u.fString.fSame = uhash_get(bundle->fStringSet, res);
|
||||
}
|
||||
}
|
||||
if (res->u.fString.fSame == NULL) {
|
||||
/* this is a new string */
|
||||
res->u.fString.fChars = (UChar *) uprv_malloc(sizeof(UChar) * (len + 1));
|
||||
|
||||
if (res->u.fString.fChars == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
uprv_free(res);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
uprv_memcpy(res->u.fString.fChars, value, sizeof(UChar) * len);
|
||||
res->u.fString.fChars[len] = 0;
|
||||
if (bundle->fStringSet != NULL) {
|
||||
/* put it into the set for finding duplicates */
|
||||
uhash_put(bundle->fStringSet, res, res, status);
|
||||
}
|
||||
|
||||
if (bundle->fStringsForm != STRINGS_UTF16_V1) {
|
||||
if (len <= MAX_IMPLICIT_STRING_LENGTH && !U16_IS_TRAIL(value[0]) && len == u_strlen(value)) {
|
||||
/*
|
||||
* This string will be stored without an explicit length.
|
||||
* Runtime will detect !U16_IS_TRAIL(value[0]) and call u_strlen().
|
||||
*/
|
||||
res->u.fString.fNumCharsForLength = 0;
|
||||
} else if (len <= 0x3ee) {
|
||||
res->u.fString.fNumCharsForLength = 1;
|
||||
} else if (len <= 0xfffff) {
|
||||
res->u.fString.fNumCharsForLength = 2;
|
||||
} else {
|
||||
res->u.fString.fNumCharsForLength = 3;
|
||||
}
|
||||
bundle->f16BitUnitsLength += res->u.fString.fNumCharsForLength + len + 1; /* +1 for the NUL */
|
||||
}
|
||||
} else {
|
||||
/* this is a duplicate of fSame */
|
||||
struct SResource *same = res->u.fString.fSame;
|
||||
res->u.fString.fChars = same->u.fString.fChars;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/* TODO: make alias_open and string_open use the same code */
|
||||
struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
|
||||
struct SResource *res = res_open(bundle, tag, comment, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
return NULL;
|
||||
}
|
||||
res->fType = URES_ALIAS;
|
||||
if (len == 0 && gFormatVersion > 1) {
|
||||
res->u.fString.fChars = &gEmptyString;
|
||||
res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_ALIAS);
|
||||
res->fWritten = TRUE;
|
||||
return res;
|
||||
}
|
||||
|
||||
res->u.fString.fLength = len;
|
||||
res->u.fString.fChars = (UChar *) uprv_malloc(sizeof(UChar) * (len + 1));
|
||||
res->u.fString.fChars = (UChar *) uprv_malloc(sizeof(UChar) * (len + 1));
|
||||
if (res->u.fString.fChars == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
uprv_free(res);
|
||||
return NULL;
|
||||
}
|
||||
uprv_memcpy(res->u.fString.fChars, value, sizeof(UChar) * (len + 1));
|
||||
uprv_memcpy(res->u.fString.fChars, value, sizeof(UChar) * len);
|
||||
res->u.fString.fChars[len] = 0;
|
||||
return res;
|
||||
}
|
||||
|
||||
struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
|
||||
return stringbase_open(bundle, tag, URES_STRING, value, len, comment, status);
|
||||
}
|
||||
|
||||
struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
|
||||
return stringbase_open(bundle, tag, URES_ALIAS, value, len, comment, status);
|
||||
}
|
||||
|
||||
|
||||
struct SResource* intvector_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
|
||||
struct SResource *res = res_open(bundle, tag, comment, status);
|
||||
|
@ -1142,9 +1184,7 @@ static void array_close(struct SResource *array) {
|
|||
|
||||
static void string_close(struct SResource *string) {
|
||||
if (string->u.fString.fChars != NULL &&
|
||||
string->u.fString.fChars != &gEmptyString &&
|
||||
string->u.fString.fSame == NULL
|
||||
) {
|
||||
string->u.fString.fChars != &gEmptyString) {
|
||||
uprv_free(string->u.fString.fChars);
|
||||
string->u.fString.fChars =NULL;
|
||||
}
|
||||
|
@ -1218,18 +1258,10 @@ void bundle_close(struct SRBRoot *bundle, UErrorCode *status) {
|
|||
uprv_free(bundle->fLocale);
|
||||
uprv_free(bundle->fKeys);
|
||||
uprv_free(bundle->fKeyMap);
|
||||
uhash_close(bundle->fStringSet);
|
||||
uprv_free(bundle->f16BitUnits);
|
||||
uprv_free(bundle);
|
||||
}
|
||||
|
||||
void bundle_closeString(struct SRBRoot *bundle, struct SResource *string) {
|
||||
if (bundle->fStringSet != NULL) {
|
||||
uhash_remove(bundle->fStringSet, string);
|
||||
}
|
||||
string_close(string);
|
||||
}
|
||||
|
||||
/* Adding Functions */
|
||||
void table_add(struct SResource *table, struct SResource *res, int linenumber, UErrorCode *status) {
|
||||
struct SResource *current = NULL;
|
||||
|
@ -1664,14 +1696,22 @@ string_writeUTF16v2(struct SRBRoot *bundle, struct SResource *res, int32_t utf16
|
|||
|
||||
static void
|
||||
bundle_compactStrings(struct SRBRoot *bundle, UErrorCode *status) {
|
||||
UHashtable *stringSet;
|
||||
if (gFormatVersion > 1) {
|
||||
stringSet = uhash_open(string_hash, string_comp, string_comp, status);
|
||||
res_preflightStrings(bundle, bundle->fRoot, stringSet, status);
|
||||
} else {
|
||||
stringSet = NULL;
|
||||
}
|
||||
if (U_FAILURE(*status)) {
|
||||
uhash_close(stringSet);
|
||||
return;
|
||||
}
|
||||
switch(bundle->fStringsForm) {
|
||||
case STRINGS_UTF16_V2:
|
||||
if (bundle->f16BitUnitsLength > 0) {
|
||||
struct SResource **array;
|
||||
int32_t count = uhash_count(bundle->fStringSet);
|
||||
int32_t count = uhash_count(stringSet);
|
||||
int32_t i, pos;
|
||||
/*
|
||||
* Allocate enough space for the initial NUL and the UTF-16 v2 strings,
|
||||
|
@ -1685,6 +1725,7 @@ bundle_compactStrings(struct SRBRoot *bundle, UErrorCode *status) {
|
|||
uprv_free(bundle->f16BitUnits);
|
||||
bundle->f16BitUnits = NULL;
|
||||
uprv_free(array);
|
||||
uhash_close(stringSet);
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
@ -1694,7 +1735,7 @@ bundle_compactStrings(struct SRBRoot *bundle, UErrorCode *status) {
|
|||
utf16Length = 1;
|
||||
++bundle->f16BitUnitsLength;
|
||||
for (pos = -1, i = 0; i < count; ++i) {
|
||||
array[i] = (struct SResource *)uhash_nextElement(bundle->fStringSet, &pos)->key.pointer;
|
||||
array[i] = (struct SResource *)uhash_nextElement(stringSet, &pos)->key.pointer;
|
||||
}
|
||||
/* Sort the strings so that each one is immediately followed by all of its suffixes. */
|
||||
uprv_sortArray(array, count, (int32_t)sizeof(struct SResource **),
|
||||
|
@ -1769,4 +1810,5 @@ bundle_compactStrings(struct SRBRoot *bundle, UErrorCode *status) {
|
|||
default:
|
||||
break;
|
||||
}
|
||||
uhash_close(stringSet);
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2000-2011, International Business Machines
|
||||
* Copyright (C) 2000-2014, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -29,7 +29,6 @@
|
|||
#include "cstring.h"
|
||||
#include "unewdata.h"
|
||||
#include "ustr.h"
|
||||
#include "uhash.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
|
@ -54,7 +53,6 @@ struct SRBRoot {
|
|||
int32_t fKeysCount;
|
||||
int32_t fLocalKeyLimit; /* key offset < limit fits into URES_TABLE */
|
||||
|
||||
UHashtable *fStringSet;
|
||||
uint16_t *f16BitUnits;
|
||||
int32_t f16BitUnitsCapacity;
|
||||
int32_t f16BitUnitsLength;
|
||||
|
@ -132,13 +130,6 @@ struct SResString {
|
|||
|
||||
struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Remove a string from a bundle and close (delete) it.
|
||||
* The string must not have been added to a table or array yet.
|
||||
* This function only undoes what string_open() did.
|
||||
*/
|
||||
void bundle_closeString(struct SRBRoot *bundle, struct SResource *string);
|
||||
|
||||
struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status);
|
||||
|
||||
struct SResIntVector {
|
||||
|
|
Loading…
Add table
Reference in a new issue