mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 15:05:53 +00:00
ICU-8972 adjust property names builder to simpler pnames_data.h
X-SVN-Rev: 31165
This commit is contained in:
parent
05d42d4ed3
commit
f3fd941998
3 changed files with 1164 additions and 3039 deletions
File diff suppressed because it is too large
Load diff
|
@ -21,6 +21,7 @@
|
|||
#include "genprops.h"
|
||||
#include "propname.h"
|
||||
#include "toolutil.h"
|
||||
#include "uhash.h"
|
||||
#include "uinvchar.h"
|
||||
#include "unewdata.h"
|
||||
#include "uvectr32.h"
|
||||
|
@ -45,144 +46,109 @@ U_NAMESPACE_USE
|
|||
// data.
|
||||
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "unicode/unorm2.h"
|
||||
#include "unicode/uscript.h"
|
||||
|
||||
class AliasName {
|
||||
// Dilemma: We want to use MAX_ALIASES to define fields in the Value class.
|
||||
// However, we need to define the class before including the data header
|
||||
// and we can use MAX_ALIASES only after including it.
|
||||
// So we define a second constant and at runtime check that it's >=MAX_ALIASES.
|
||||
static const int32_t VALUE_MAX_ALIASES=4;
|
||||
|
||||
class Value {
|
||||
public:
|
||||
const char* str;
|
||||
int32_t index;
|
||||
char normalized[64];
|
||||
|
||||
AliasName(const char* str, int32_t index);
|
||||
|
||||
int compare(const AliasName& other) const {
|
||||
return uprv_strcmp(normalized, other.normalized);
|
||||
}
|
||||
|
||||
UBool operator==(const AliasName& other) const {
|
||||
return compare(other) == 0;
|
||||
}
|
||||
|
||||
UBool operator!=(const AliasName& other) const {
|
||||
return compare(other) != 0;
|
||||
}
|
||||
};
|
||||
|
||||
AliasName::AliasName(const char* _str,
|
||||
int32_t _index) :
|
||||
str(_str),
|
||||
index(_index)
|
||||
{
|
||||
// Build the normalized form of the alias.
|
||||
const char *s=str;
|
||||
char c;
|
||||
int32_t i=0;
|
||||
while((c=*s++)!=0) {
|
||||
// Ignore delimiters '-', '_', and ASCII White_Space.
|
||||
if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
|
||||
continue;
|
||||
Value(int32_t enumValue, const char *joinedAliases)
|
||||
: enumValue(enumValue), joinedAliases(joinedAliases), count(0) {
|
||||
if(uprv_strlen(joinedAliases)>=LENGTHOF(aliasesBuffer)) {
|
||||
fprintf(stderr,
|
||||
"genprops error: pnamesbuilder.cpp Value::Value(%ld, \"%s\"): "
|
||||
"joined aliases too long: make Value::aliasesBuffer[] larger, "
|
||||
"at least %ld\n",
|
||||
(long)enumValue, joinedAliases, uprv_strlen(joinedAliases)+1);
|
||||
exit(U_BUFFER_OVERFLOW_ERROR);
|
||||
}
|
||||
normalized[i++]=uprv_tolower(c);
|
||||
// Copy the space-separated aliases into NUL-separated ones and count them.
|
||||
// Write a normalized version of each one.
|
||||
const char *j=joinedAliases;
|
||||
char *a=aliasesBuffer;
|
||||
char *n=normalizedBuffer;
|
||||
char c;
|
||||
do {
|
||||
aliases[count]=a;
|
||||
normalized[count++]=n;
|
||||
char c;
|
||||
while((c=*j)!=' ' && c!=0) {
|
||||
*a++=c;
|
||||
// Ignore delimiters '-' and '_'.
|
||||
if(!(c=='-' || c=='_')) {
|
||||
*n++=uprv_tolower(c);
|
||||
}
|
||||
++j;
|
||||
}
|
||||
*a++=0;
|
||||
*n++=0;
|
||||
} while(c!=0);
|
||||
}
|
||||
normalized[i]=0;
|
||||
if(i>=LENGTHOF(normalized)) {
|
||||
fprintf(stderr,
|
||||
"Error: Property (value) alias '%s' results in "
|
||||
"too-long normalized string (length %d)\n",
|
||||
str, (int)i);
|
||||
exit(U_BUFFER_OVERFLOW_ERROR);
|
||||
|
||||
/**
|
||||
* Writes at most MAX_ALIASES pointers for unique normalized aliases
|
||||
* (no empty strings) to dest and returns how many there are.
|
||||
*/
|
||||
int32_t getUniqueNormalizedAliases(const char *dest[]) const {
|
||||
int32_t numUnique=0;
|
||||
for(int32_t i=0; i<count; ++i) {
|
||||
const char *s=normalized[i];
|
||||
if(*s!=0) { // Omit empty strings.
|
||||
for(int32_t j=0;; ++j) {
|
||||
if(j==numUnique) {
|
||||
// s is a new unique alias.
|
||||
dest[numUnique++]=s;
|
||||
break;
|
||||
}
|
||||
if(0==uprv_strcmp(s, dest[j])) {
|
||||
// s is equal or equivalent to an earlier alias.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return numUnique;
|
||||
}
|
||||
}
|
||||
|
||||
class Alias {
|
||||
public:
|
||||
int32_t enumValue;
|
||||
int32_t nameGroupIndex;
|
||||
|
||||
Alias(int32_t enumValue, int32_t nameGroupIndex);
|
||||
|
||||
int32_t getUniqueNames(int32_t* nameGroupIndices) const;
|
||||
int32_t enumValue;
|
||||
const char *joinedAliases;
|
||||
char aliasesBuffer[100];
|
||||
char normalizedBuffer[100]; // Same capacity as aliasesBuffer!
|
||||
const char *aliases[VALUE_MAX_ALIASES];
|
||||
const char *normalized[VALUE_MAX_ALIASES];
|
||||
int32_t count;
|
||||
};
|
||||
|
||||
Alias::Alias(int32_t anEnumValue, int32_t aNameGroupIndex) :
|
||||
enumValue(anEnumValue),
|
||||
nameGroupIndex(aNameGroupIndex)
|
||||
{
|
||||
}
|
||||
|
||||
class Property : public Alias {
|
||||
class Property : public Value {
|
||||
public:
|
||||
int32_t valueCount;
|
||||
const Alias* valueList;
|
||||
Property(int32_t enumValue, const char *joinedAliases,
|
||||
const Value *values, int32_t valueCount)
|
||||
: Value(enumValue, joinedAliases),
|
||||
values(values), valueCount(valueCount) {}
|
||||
|
||||
Property(int32_t enumValue,
|
||||
int32_t nameGroupIndex,
|
||||
int32_t valueCount,
|
||||
const Alias* valueList);
|
||||
const Value *values;
|
||||
int32_t valueCount;
|
||||
};
|
||||
|
||||
Property::Property(int32_t _enumValue,
|
||||
int32_t _nameGroupIndex,
|
||||
int32_t _valueCount,
|
||||
const Alias* _valueList) :
|
||||
Alias(_enumValue, _nameGroupIndex),
|
||||
valueCount(_valueCount),
|
||||
valueList(_valueList)
|
||||
{
|
||||
}
|
||||
|
||||
// *** Include the data header ***
|
||||
#include "pnames_data.h"
|
||||
|
||||
/* return a list of unique names, not including "", for this property
|
||||
* @param stringIndices array of at least MAX_NAMES_PER_GROUP
|
||||
* elements, will be filled with indices into STRING_TABLE
|
||||
* @return number of indices, >= 1
|
||||
*/
|
||||
int32_t Alias::getUniqueNames(int32_t* stringIndices) const {
|
||||
int32_t count = 0;
|
||||
int32_t i = nameGroupIndex;
|
||||
UBool done = FALSE;
|
||||
while (!done) {
|
||||
int32_t j = NAME_GROUP[i++];
|
||||
if (j < 0) {
|
||||
done = TRUE;
|
||||
j = -j;
|
||||
}
|
||||
if (j == 0) continue; // omit "" entries
|
||||
UBool dupe = FALSE;
|
||||
for (int32_t k=0; k<count; ++k) {
|
||||
if (stringIndices[k] == j) {
|
||||
dupe = TRUE;
|
||||
break;
|
||||
}
|
||||
// also do a string check for things like "age|Age"
|
||||
if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) {
|
||||
//printf("Found dupe %s|%s\n",
|
||||
// STRING_TABLE[stringIndices[k]].str,
|
||||
// STRING_TABLE[j].str);
|
||||
dupe = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (dupe) continue; // omit duplicates
|
||||
stringIndices[count++] = j;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
// END DATA
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
class PNamesBuilderImpl;
|
||||
|
||||
class PNamesPropertyNames : public PropertyNames {
|
||||
public:
|
||||
PNamesPropertyNames(const PNamesBuilderImpl &pnwi)
|
||||
: impl(pnwi), valueMaps(NULL), bytesTries(NULL) {}
|
||||
void init();
|
||||
PNamesPropertyNames()
|
||||
: valueMaps(NULL), bytesTries(NULL) {}
|
||||
void init(const int32_t *vm, const uint8_t *bt) {
|
||||
valueMaps=vm;
|
||||
bytesTries=bt;
|
||||
}
|
||||
virtual int32_t getPropertyEnum(const char *name) const;
|
||||
virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const;
|
||||
private:
|
||||
|
@ -190,7 +156,6 @@ private:
|
|||
UBool containsName(BytesTrie &trie, const char *name) const;
|
||||
int32_t getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) const;
|
||||
|
||||
const PNamesBuilderImpl &impl;
|
||||
const int32_t *valueMaps;
|
||||
const uint8_t *bytesTries;
|
||||
};
|
||||
|
@ -199,18 +164,32 @@ class PNamesBuilderImpl : public PNamesBuilder {
|
|||
public:
|
||||
PNamesBuilderImpl(UErrorCode &errorCode)
|
||||
: valueMaps(errorCode), btb(errorCode), maxNameLength(0),
|
||||
pnames(*this) {}
|
||||
nameGroupToOffset(NULL) {}
|
||||
|
||||
~PNamesBuilderImpl() {
|
||||
uhash_close(nameGroupToOffset);
|
||||
}
|
||||
|
||||
virtual void build(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return; }
|
||||
if(VALUE_MAX_ALIASES<MAX_ALIASES) {
|
||||
fprintf(stderr,
|
||||
"genprops error: pnamesbuilder.cpp VALUE_MAX_ALIASES=%d<%d=MAX_ALIASES -- "
|
||||
"need to change VALUE_MAX_ALIASES to at least %d\n",
|
||||
(int)VALUE_MAX_ALIASES, (int)MAX_ALIASES, (int)MAX_ALIASES);
|
||||
errorCode=U_INTERNAL_PROGRAM_ERROR;
|
||||
return;
|
||||
}
|
||||
nameGroupToOffset=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
|
||||
// Build main property aliases value map at value map offset 0,
|
||||
// so that we need not store another offset for it.
|
||||
UVector32 propEnums(errorCode);
|
||||
int32_t propIndex;
|
||||
for(propIndex=0; propIndex<PROPERTY_COUNT; ++propIndex) {
|
||||
propEnums.sortedInsert(PROPERTY[propIndex].enumValue, errorCode);
|
||||
for(propIndex=0; propIndex<PROPERTIES_COUNT; ++propIndex) {
|
||||
propEnums.sortedInsert(PROPERTIES[propIndex].enumValue, errorCode);
|
||||
}
|
||||
int32_t ranges[10][2];
|
||||
int32_t numPropRanges=uprv_makeDenseRanges(propEnums.getBuffer(), PROPERTY_COUNT, 0x100,
|
||||
int32_t numPropRanges=uprv_makeDenseRanges(propEnums.getBuffer(), PROPERTIES_COUNT, 0x100,
|
||||
ranges, LENGTHOF(ranges));
|
||||
valueMaps.addElement(numPropRanges, errorCode);
|
||||
int32_t i, j;
|
||||
|
@ -226,48 +205,48 @@ public:
|
|||
|
||||
// Build the properties trie first, at BytesTrie offset 0,
|
||||
// so that we need not store another offset for it.
|
||||
buildAliasesBytesTrie(PROPERTY, PROPERTY_COUNT, errorCode);
|
||||
buildPropertiesBytesTrie(PROPERTIES, PROPERTIES_COUNT, errorCode);
|
||||
|
||||
// Build the name group for the first property, at nameGroups offset 0.
|
||||
// Name groups for *value* aliases must not start at offset 0
|
||||
// because that is a missing-value marker for sparse value ranges.
|
||||
setPropertyInt(PROPERTY[0].enumValue, 0,
|
||||
writeNameGroup(PROPERTY[0], errorCode));
|
||||
setPropertyInt(PROPERTIES[0].enumValue, 0,
|
||||
writeValueAliases(PROPERTIES[0], errorCode));
|
||||
|
||||
// Build the known-repeated binary properties once.
|
||||
int32_t binPropsValueMapOffset=valueMaps.size();
|
||||
int32_t bytesTrieOffset=buildAliasesBytesTrie(VALUES_binprop, VALUES_binprop_COUNT, errorCode);
|
||||
int32_t bytesTrieOffset=buildValuesBytesTrie(VALUES_binprop, VALUES_binprop_COUNT, errorCode);
|
||||
valueMaps.addElement(bytesTrieOffset, errorCode);
|
||||
buildValueMap(VALUES_binprop, VALUES_binprop_COUNT, errorCode);
|
||||
|
||||
// Build the known-repeated canonical combining class properties once.
|
||||
int32_t cccValueMapOffset=valueMaps.size();
|
||||
bytesTrieOffset=buildAliasesBytesTrie(VALUES_ccc, VALUES_ccc_COUNT, errorCode);
|
||||
bytesTrieOffset=buildValuesBytesTrie(VALUES_ccc, VALUES_ccc_COUNT, errorCode);
|
||||
valueMaps.addElement(bytesTrieOffset, errorCode);
|
||||
buildValueMap(VALUES_ccc, VALUES_ccc_COUNT, errorCode);
|
||||
|
||||
// Build the rest of the data.
|
||||
for(propIndex=0; propIndex<PROPERTY_COUNT; ++propIndex) {
|
||||
for(propIndex=0; propIndex<PROPERTIES_COUNT; ++propIndex) {
|
||||
if(propIndex>0) {
|
||||
// writeNameGroup(PROPERTY[0], ...) already done
|
||||
setPropertyInt(PROPERTY[propIndex].enumValue, 0,
|
||||
writeNameGroup(PROPERTY[propIndex], errorCode));
|
||||
// writeValueAliases(PROPERTIES[0], ...) already done
|
||||
setPropertyInt(PROPERTIES[propIndex].enumValue, 0,
|
||||
writeValueAliases(PROPERTIES[propIndex], errorCode));
|
||||
}
|
||||
int32_t valueCount=PROPERTY[propIndex].valueCount;
|
||||
int32_t valueCount=PROPERTIES[propIndex].valueCount;
|
||||
if(valueCount>0) {
|
||||
int32_t valueMapOffset;
|
||||
const Alias *valueList=PROPERTY[propIndex].valueList;
|
||||
if(valueList==VALUES_binprop) {
|
||||
const Value *values=PROPERTIES[propIndex].values;
|
||||
if(values==VALUES_binprop) {
|
||||
valueMapOffset=binPropsValueMapOffset;
|
||||
} else if(valueList==VALUES_ccc || valueList==VALUES_lccc || valueList==VALUES_tccc) {
|
||||
} else if(values==VALUES_ccc || values==VALUES_lccc || values==VALUES_tccc) {
|
||||
valueMapOffset=cccValueMapOffset;
|
||||
} else {
|
||||
valueMapOffset=valueMaps.size();
|
||||
bytesTrieOffset=buildAliasesBytesTrie(valueList, valueCount, errorCode);
|
||||
bytesTrieOffset=buildValuesBytesTrie(values, valueCount, errorCode);
|
||||
valueMaps.addElement(bytesTrieOffset, errorCode);
|
||||
buildValueMap(valueList, valueCount, errorCode);
|
||||
buildValueMap(values, valueCount, errorCode);
|
||||
}
|
||||
setPropertyInt(PROPERTY[propIndex].enumValue, 1, valueMapOffset);
|
||||
setPropertyInt(PROPERTIES[propIndex].enumValue, 1, valueMapOffset);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -299,20 +278,24 @@ public:
|
|||
virtual void writeCSourceFile(const char *path, UErrorCode &errorCode);
|
||||
virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode);
|
||||
|
||||
int32_t writeNameGroup(const Alias &alias, UErrorCode &errorCode) {
|
||||
int32_t nameOffset=nameGroups.length();
|
||||
// Count how many aliases this group has.
|
||||
int32_t i=alias.nameGroupIndex;
|
||||
int32_t nameIndex;
|
||||
do { nameIndex=NAME_GROUP[i++]; } while(nameIndex>=0);
|
||||
int32_t count=i-alias.nameGroupIndex;
|
||||
int32_t writeValueAliases(const Value &value, UErrorCode &errorCode) {
|
||||
int32_t nameOffset=uhash_geti(nameGroupToOffset, (void *)value.joinedAliases);
|
||||
if(nameOffset!=0) {
|
||||
printf("* duplicate joinedAliases: \"%s\"\n", value.joinedAliases);
|
||||
// The same list of aliases has been written already.
|
||||
return nameOffset-1; // Was incremented to reserve 0 for "not found".
|
||||
}
|
||||
// Write this not-yet-seen list of aliases.
|
||||
nameOffset=nameGroups.length();
|
||||
uhash_puti(nameGroupToOffset, (void *)value.joinedAliases,
|
||||
nameOffset+1, &errorCode);
|
||||
// The first byte tells us how many aliases there are.
|
||||
// We use only values 0..0x1f in the first byte because when we write
|
||||
// the name groups as an invariant-character string into a source file,
|
||||
// those values (C0 control codes) are written as numbers rather than as characters.
|
||||
int32_t count=value.count;
|
||||
if(count>=0x20) {
|
||||
fprintf(stderr, "Error: Too many aliases in the group with index %d\n",
|
||||
(int)alias.nameGroupIndex);
|
||||
fprintf(stderr, "Error: Too many aliases in \"%s\"\n", value.joinedAliases);
|
||||
exit(U_INDEX_OUTOFBOUNDS_ERROR);
|
||||
}
|
||||
nameGroups.append((char)count, errorCode);
|
||||
|
@ -321,30 +304,27 @@ public:
|
|||
// In such a case, we could set a flag and omit the duplicate,
|
||||
// but that would save only about 1.35% of total data size (Unicode 6.0/ICU 4.6)
|
||||
// which is not worth the trouble.
|
||||
i=alias.nameGroupIndex;
|
||||
int32_t n;
|
||||
do {
|
||||
nameIndex=n=NAME_GROUP[i++];
|
||||
if(nameIndex<0) {
|
||||
nameIndex=-nameIndex;
|
||||
}
|
||||
const char *s=STRING_TABLE[nameIndex].str;
|
||||
// Note: In Unicode 6.1, there are more duplicates due to newly added
|
||||
// short names for blocks and other properties.
|
||||
// It might now be worth changing the data structure.
|
||||
for(int32_t i=0; i<count; ++i) {
|
||||
const char *s=value.aliases[i];
|
||||
int32_t sLength=uprv_strlen(s)+1;
|
||||
if(sLength>maxNameLength) {
|
||||
maxNameLength=sLength;
|
||||
}
|
||||
nameGroups.append(s, sLength, errorCode); // including NUL
|
||||
} while(n>=0);
|
||||
}
|
||||
return nameOffset;
|
||||
}
|
||||
|
||||
void buildValueMap(const Alias aliases[], int32_t length, UErrorCode &errorCode) {
|
||||
void buildValueMap(const Value values[], int32_t length, UErrorCode &errorCode) {
|
||||
UVector32 sortedValues(errorCode);
|
||||
UVector32 nameOffsets(errorCode); // Parallel to aliases[].
|
||||
UVector32 nameOffsets(errorCode); // Parallel to values[].
|
||||
int32_t i;
|
||||
for(i=0; i<length; ++i) {
|
||||
sortedValues.sortedInsert(aliases[i].enumValue, errorCode);
|
||||
nameOffsets.addElement(writeNameGroup(aliases[i], errorCode), errorCode);
|
||||
sortedValues.sortedInsert(values[i].enumValue, errorCode);
|
||||
nameOffsets.addElement(writeValueAliases(values[i], errorCode), errorCode);
|
||||
}
|
||||
int32_t ranges[10][2];
|
||||
int32_t numRanges=uprv_makeDenseRanges(sortedValues.getBuffer(), length, 0xe0,
|
||||
|
@ -359,8 +339,8 @@ public:
|
|||
// in which case we write a nameOffset of 0.
|
||||
// Real nameOffsets for property values are never 0.
|
||||
// (The first name group is for the first property name.)
|
||||
int32_t aliasIndex=aliasesIndexOf(aliases, length, j);
|
||||
int32_t nameOffset= aliasIndex>=0 ? nameOffsets.elementAti(aliasIndex) : 0;
|
||||
int32_t valueIndex=valuesIndexOf(values, length, j);
|
||||
int32_t nameOffset= valueIndex>=0 ? nameOffsets.elementAti(valueIndex) : 0;
|
||||
valueMaps.addElement(nameOffset, errorCode);
|
||||
}
|
||||
}
|
||||
|
@ -373,15 +353,15 @@ public:
|
|||
for(i=0; i<length; ++i) {
|
||||
valueMaps.addElement(
|
||||
nameOffsets.elementAti(
|
||||
aliasesIndexOf(aliases, length,
|
||||
valuesIndexOf(values, length,
|
||||
sortedValues.elementAti(i))), errorCode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t aliasesIndexOf(const Alias aliases[], int32_t length, int32_t value) {
|
||||
static int32_t valuesIndexOf(const Value values[], int32_t length, int32_t value) {
|
||||
for(int32_t i=0;; ++i) {
|
||||
if(aliases[i].enumValue==value) {
|
||||
if(values[i].enumValue==value) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
@ -403,32 +383,32 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
void addAliasToBytesTrie(const Alias &alias, UErrorCode &errorCode) {
|
||||
int32_t names[MAX_NAMES_PER_GROUP];
|
||||
int32_t numNames=alias.getUniqueNames(names);
|
||||
for(int32_t i=0; i<numNames; ++i) {
|
||||
// printf("* adding %s: 0x%lx\n", STRING_TABLE[names[i]].normalized, (long)alias.enumValue);
|
||||
btb.add(STRING_TABLE[names[i]].normalized, alias.enumValue, errorCode);
|
||||
void addValueToBytesTrie(const Value &value, UErrorCode &errorCode) {
|
||||
const char *aliases[MAX_ALIASES];
|
||||
int32_t numAliases=value.getUniqueNormalizedAliases(aliases);
|
||||
for(int32_t i=0; i<numAliases; ++i) {
|
||||
btb.add(aliases[i], value.enumValue, errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
int32_t buildAliasesBytesTrie(const Alias aliases[], int32_t length, UErrorCode &errorCode) {
|
||||
int32_t buildValuesBytesTrie(const Value values[], int32_t length, UErrorCode &errorCode) {
|
||||
btb.clear();
|
||||
for(int32_t i=0; i<length; ++i) {
|
||||
addAliasToBytesTrie(aliases[i], errorCode);
|
||||
addValueToBytesTrie(values[i], errorCode);
|
||||
}
|
||||
int32_t bytesTrieOffset=bytesTries.length();
|
||||
bytesTries.append(btb.buildStringPiece(USTRINGTRIE_BUILD_SMALL, errorCode), errorCode);
|
||||
return bytesTrieOffset;
|
||||
}
|
||||
|
||||
// Overload for Property. Property is-an Alias, but when we iterate through
|
||||
// the array we need to increment by the right object size.
|
||||
int32_t buildAliasesBytesTrie(const Property aliases[], int32_t length,
|
||||
UErrorCode &errorCode) {
|
||||
// Variant of buildValuesBytesTrie() for Property.
|
||||
// Property is-a Value, and the source code is the same,
|
||||
// but when we iterate through the array we need to increment by the right object size.
|
||||
int32_t buildPropertiesBytesTrie(const Property properties[], int32_t length,
|
||||
UErrorCode &errorCode) {
|
||||
btb.clear();
|
||||
for(int32_t i=0; i<length; ++i) {
|
||||
addAliasToBytesTrie(aliases[i], errorCode);
|
||||
addValueToBytesTrie(properties[i], errorCode);
|
||||
}
|
||||
int32_t bytesTrieOffset=bytesTries.length();
|
||||
bytesTries.append(btb.buildStringPiece(USTRINGTRIE_BUILD_SMALL, errorCode), errorCode);
|
||||
|
@ -436,10 +416,12 @@ public:
|
|||
}
|
||||
|
||||
virtual const PropertyNames *getPropertyNames() {
|
||||
pnames.init();
|
||||
pnames.init(valueMaps.getBuffer(),
|
||||
reinterpret_cast<const uint8_t *>(bytesTries.data()));
|
||||
return &pnames;
|
||||
}
|
||||
|
||||
private:
|
||||
int32_t indexes[PropNameData::IX_COUNT];
|
||||
UVector32 valueMaps;
|
||||
BytesTrieBuilder btb;
|
||||
|
@ -447,6 +429,7 @@ public:
|
|||
CharString nameGroups;
|
||||
int32_t maxNameLength;
|
||||
PNamesPropertyNames pnames;
|
||||
UHashtable *nameGroupToOffset;
|
||||
};
|
||||
|
||||
/* UDataInfo cf. udata.h */
|
||||
|
@ -461,7 +444,7 @@ static const UDataInfo dataInfo = {
|
|||
|
||||
{ PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3 },
|
||||
{ 2, 0, 0, 0 }, /* formatVersion */
|
||||
{ VERSION_0, VERSION_1, VERSION_2, VERSION_3 } /* Unicode version */
|
||||
UNICODE_VERSION
|
||||
};
|
||||
|
||||
void
|
||||
|
@ -537,11 +520,6 @@ createPNamesBuilder(UErrorCode &errorCode) {
|
|||
// Note: The following is a partial copy of runtime propname.cpp code.
|
||||
// Consider changing that into a semi-public API to avoid duplication.
|
||||
|
||||
void PNamesPropertyNames::init() {
|
||||
valueMaps=impl.valueMaps.getBuffer();
|
||||
bytesTries=reinterpret_cast<const uint8_t *>(impl.bytesTries.data());
|
||||
}
|
||||
|
||||
int32_t PNamesPropertyNames::findProperty(int32_t property) const {
|
||||
int32_t i=1; // valueMaps index, initially after numRanges
|
||||
for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
|
||||
|
|
|
@ -1676,12 +1676,18 @@ def WritePNamesDataHeader(out_path):
|
|||
* others. All Rights Reserved.
|
||||
*
|
||||
* machine-generated by: icu/tools/unicode/py/preparseucd.py
|
||||
*
|
||||
* Unicode """ + _ucd_version + """
|
||||
*/
|
||||
|
||||
""")
|
||||
|
||||
version = _ucd_version.split('.')
|
||||
while len(version) < 4: version.append("0")
|
||||
out_file.write("#define UNICODE_VERSION { %s }\n\n" % ", ".join(version))
|
||||
|
||||
# Count the maximum number of aliases for any property or value.
|
||||
# We write the final value at the end.
|
||||
max_aliases = max(len(_binary_values["N"]), len(_binary_values["Y"]))
|
||||
|
||||
# Write an array of "binprop" Value object initializers
|
||||
# with the value aliases shared among all binary properties.
|
||||
out_file.write("const int32_t VALUES_binprop_COUNT = 2;\n\n")
|
||||
|
@ -1693,6 +1699,9 @@ def WritePNamesDataHeader(out_path):
|
|||
# For each property with named values, write an array of
|
||||
# Value object initializers with the value enum and the aliases.
|
||||
for (p_enum, pname, values) in _icu_properties:
|
||||
prop = _properties[pname]
|
||||
aliases = prop[1]
|
||||
if len(aliases) > max_aliases: max_aliases = len(aliases)
|
||||
if not values: continue
|
||||
out_file.write("const int32_t VALUES_%s_COUNT = %d;\n\n" %
|
||||
(pname, len(values)))
|
||||
|
@ -1702,6 +1711,7 @@ def WritePNamesDataHeader(out_path):
|
|||
# ccc, lccc, tccc: Omit the numeric strings from the aliases.
|
||||
# (See the comment about ccc in the PropertyValueAliases.txt header.)
|
||||
if pname.endswith("ccc"): aliases = aliases[1:]
|
||||
if len(aliases) > max_aliases: max_aliases = len(aliases)
|
||||
cast = "(int32_t)" if pname == "gcm" else ""
|
||||
out_file.write(' Value(%s%s, "%s"),\n' %
|
||||
(cast, v_enum, " ".join(aliases)))
|
||||
|
@ -1723,7 +1733,9 @@ def WritePNamesDataHeader(out_path):
|
|||
else:
|
||||
out_file.write(' Property(%s, "%s", NULL, 0),\n' %
|
||||
(enum, " ".join(aliases)))
|
||||
out_file.write("};\n")
|
||||
out_file.write("};\n\n")
|
||||
|
||||
out_file.write("const int32_t MAX_ALIASES = %d;\n" % max_aliases)
|
||||
|
||||
# main() ------------------------------------------------------------------- ***
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue