mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 00:43:32 +00:00
ICU-2015 initial checkin
X-SVN-Rev: 10098
This commit is contained in:
parent
584ee76800
commit
9a4ac886fe
2 changed files with 557 additions and 0 deletions
icu4c/source/common
180
icu4c/source/common/propname.cpp
Normal file
180
icu4c/source/common/propname.cpp
Normal file
|
@ -0,0 +1,180 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2002, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
* Created: October 30 2002
|
||||
* Since: ICU 2.2
|
||||
**********************************************************************
|
||||
*/
|
||||
#include "propname.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "umutex.h"
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// PropertyAliases implementation
|
||||
|
||||
const char*
|
||||
PropertyAliases::chooseNameInGroup(Offset offset,
|
||||
UPropertyNameChoice choice) const {
|
||||
int32_t c = choice;
|
||||
if (!offset || c < 0) {
|
||||
return NULL;
|
||||
}
|
||||
const Offset* p = (const Offset*) getPointer(offset);
|
||||
while (c-- > 0) {
|
||||
if (*p++ < 0) return NULL;
|
||||
}
|
||||
Offset a = *p;
|
||||
if (a < 0) a = -a;
|
||||
return (const char*) getPointerNull(a);
|
||||
}
|
||||
|
||||
const ValueMap*
|
||||
PropertyAliases::getValueMap(EnumValue prop) const {
|
||||
NonContiguousEnumToOffset* e2o = (NonContiguousEnumToOffset*) getPointer(enumToValue_offset);
|
||||
Offset a = e2o->getOffset(prop);
|
||||
return (const ValueMap*) (a ? getPointerNull(a) : NULL);
|
||||
}
|
||||
|
||||
inline const char*
|
||||
PropertyAliases::getPropertyName(EnumValue prop,
|
||||
UPropertyNameChoice choice) const {
|
||||
NonContiguousEnumToOffset* e2n = (NonContiguousEnumToOffset*) getPointer(enumToName_offset);
|
||||
// (prop < UCHAR_BINARY_LIMIT) ? enumToName_binaryOffset :
|
||||
// enumToName_enumeratedOffset);
|
||||
return chooseNameInGroup(e2n->getOffset(prop), choice);
|
||||
}
|
||||
|
||||
inline EnumValue
|
||||
PropertyAliases::getPropertyEnum(const char* alias) const {
|
||||
NameToEnum* n2e = (NameToEnum*) getPointer(nameToEnum_offset);
|
||||
return n2e->getEnum(alias, *this);
|
||||
}
|
||||
|
||||
inline const char*
|
||||
PropertyAliases::getPropertyValueName(EnumValue prop,
|
||||
EnumValue value,
|
||||
UPropertyNameChoice choice) const {
|
||||
const ValueMap* vm = getValueMap(prop);
|
||||
if (!vm) return NULL;
|
||||
Offset a;
|
||||
if (vm->enumToName_offset) {
|
||||
a = ((EnumToOffset*) getPointer(vm->enumToName_offset))->
|
||||
getOffset(value);
|
||||
} else {
|
||||
a = ((NonContiguousEnumToOffset*) getPointer(vm->ncEnumToName_offset))->
|
||||
getOffset(value);
|
||||
}
|
||||
return chooseNameInGroup(a, choice);
|
||||
}
|
||||
|
||||
inline EnumValue
|
||||
PropertyAliases::getPropertyValueEnum(EnumValue prop,
|
||||
const char* alias) const {
|
||||
const ValueMap* vm = getValueMap(prop);
|
||||
if (!vm) return UCHAR_INVALID_CODE;
|
||||
NameToEnum* n2e = (NameToEnum*) getPointer(vm->nameToEnum_offset);
|
||||
return n2e->getEnum(alias, *this);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// UDataMemory structures
|
||||
|
||||
static const PropertyAliases* PNAME = NULL;
|
||||
static UDataMemory* UDATA = NULL;
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// UDataMemory loading/unloading
|
||||
|
||||
/**
|
||||
* udata callback to verify the zone data.
|
||||
*/
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV
|
||||
isAcceptable(void* /*context*/,
|
||||
const char* /*type*/, const char* /*name*/,
|
||||
const UDataInfo* info) {
|
||||
return
|
||||
info->size >= sizeof(UDataInfo) &&
|
||||
info->isBigEndian == U_IS_BIG_ENDIAN &&
|
||||
info->charsetFamily == U_CHARSET_FAMILY &&
|
||||
info->dataFormat[0] == PNAME_SIG_0 &&
|
||||
info->dataFormat[1] == PNAME_SIG_1 &&
|
||||
info->dataFormat[2] == PNAME_SIG_2 &&
|
||||
info->dataFormat[3] == PNAME_SIG_3 &&
|
||||
info->formatVersion[0] == PNAME_FORMAT_VERSION;
|
||||
}
|
||||
|
||||
UBool
|
||||
pname_cleanup() {
|
||||
if (UDATA) {
|
||||
udata_close(UDATA);
|
||||
UDATA = NULL;
|
||||
}
|
||||
PNAME = NULL;
|
||||
return TRUE;
|
||||
}
|
||||
U_CDECL_END
|
||||
|
||||
static UBool load() {
|
||||
if (!PNAME) {
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
UDataMemory* data =
|
||||
udata_openChoice(0, PNAME_DATA_TYPE, PNAME_DATA_NAME,
|
||||
isAcceptable, 0, &ec);
|
||||
if (U_SUCCESS(ec)) {
|
||||
umtx_lock(NULL);
|
||||
if (UDATA == NULL) {
|
||||
UDATA = data;
|
||||
PNAME = (const PropertyAliases*) udata_getMemory(UDATA);
|
||||
data = NULL;
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
}
|
||||
if (data) {
|
||||
udata_close(data);
|
||||
}
|
||||
}
|
||||
return PNAME!=NULL;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Public API implementation
|
||||
|
||||
// The C API is just a thin wrapper. Each function obtains a pointer
|
||||
// to the singleton PropertyAliases, and calls the appropriate method
|
||||
// on it. If it cannot obtain a pointer, because valid data is not
|
||||
// available, then it returns NULL or UCHAR_INVALID_CODE.
|
||||
|
||||
U_CAPI const char* U_EXPORT2
|
||||
u_getPropertyName(UProperty property,
|
||||
UPropertyNameChoice nameChoice) {
|
||||
return load() ? PNAME->getPropertyName(property, nameChoice)
|
||||
: NULL;
|
||||
}
|
||||
|
||||
U_CAPI UProperty U_EXPORT2
|
||||
u_getPropertyEnum(const char* alias) {
|
||||
return load() ? (UProperty) PNAME->getPropertyEnum(alias)
|
||||
: UCHAR_INVALID_CODE;
|
||||
}
|
||||
|
||||
U_CAPI const char* U_EXPORT2
|
||||
u_getPropertyValueName(UProperty property,
|
||||
int32_t value,
|
||||
UPropertyNameChoice nameChoice) {
|
||||
return load() ? PNAME->getPropertyValueName(property, value, nameChoice)
|
||||
: NULL;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_getPropertyValueEnum(UProperty property,
|
||||
const char* alias) {
|
||||
return load() ? PNAME->getPropertyValueEnum(property, alias)
|
||||
: UCHAR_INVALID_CODE;
|
||||
}
|
||||
|
||||
//eof
|
377
icu4c/source/common/propname.h
Normal file
377
icu4c/source/common/propname.h
Normal file
|
@ -0,0 +1,377 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2002, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
* Created: October 30 2002
|
||||
* Since: ICU 2.2
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef PROPNAME_H
|
||||
#define PROPNAME_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "uprops.h"
|
||||
|
||||
// This header defines the in-memory layout of the property names data
|
||||
// structure representing the UCD data files PropertyAliases.txt and
|
||||
// PropertyValueAliases.txt. It is used by:
|
||||
// propname.cpp - reads data
|
||||
// genpname - creates data
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// UDataMemory structure and signatures
|
||||
|
||||
#define PNAME_DATA_NAME "pnames"
|
||||
#define PNAME_DATA_TYPE "icu"
|
||||
|
||||
// Fields in UDataInfo:
|
||||
|
||||
// PNAME_SIG[] is encoded as numeric literals for compatibility with the HP compiler
|
||||
static const uint8_t PNAME_SIG_0 = 0x70; // p
|
||||
static const uint8_t PNAME_SIG_1 = 0x6E; // n
|
||||
static const uint8_t PNAME_SIG_2 = 0x61; // a
|
||||
static const uint8_t PNAME_SIG_3 = 0x6D; // m
|
||||
|
||||
static const int8_t PNAME_FORMAT_VERSION = 1; // formatVersion[0]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// PropertyAliases class
|
||||
|
||||
/**
|
||||
* An offset from the start of the pnames data to a contained entity.
|
||||
* This must be a signed value, since negative offsets are used as an
|
||||
* end-of-list marker. Offsets to actual objects are non-zero. A
|
||||
* zero offset indicates an absent entry; this corresponds to aliases
|
||||
* marked "n/a" in the original Unicode data files.
|
||||
*/
|
||||
typedef int16_t Offset; // must be signed
|
||||
|
||||
#define MAX_OFFSET 0x7FFF
|
||||
|
||||
/**
|
||||
* A generic value for a property or property value. Typically an
|
||||
* enum from uchar.h, but sometimes a non-enum value. It must be
|
||||
* large enough to accomodate the largest enum value, which as of this
|
||||
* writing is the largest general category mask. Need not be signed
|
||||
* but may be. Typically it doesn't matter, since the caller will
|
||||
* cast it to the proper type before use. Takes the special value
|
||||
* UCHAR_INVALID_CODE for invalid input.
|
||||
*/
|
||||
typedef int32_t EnumValue;
|
||||
|
||||
struct ValueMap; // fwd decl
|
||||
|
||||
/**
|
||||
* A class encapsulating access to the memory-mapped data representing
|
||||
* property aliases and property value aliases (pnames). The class
|
||||
* MUST have no v-table and declares certain methods inline -- small
|
||||
* methods and methods that are called from only one point.
|
||||
*
|
||||
* The data members in this class correspond to the in-memory layout
|
||||
* of the header of the pnames data.
|
||||
*/
|
||||
class PropertyAliases {
|
||||
|
||||
// -- begin pnames data --
|
||||
// Enum=>name EnumToOffset object for binary and enumerated
|
||||
// properties
|
||||
//DELETED:Offset enumToName_binaryOffset;
|
||||
//DELETED:Offset enumToName_enumeratedOffset;
|
||||
Offset enumToName_offset;
|
||||
|
||||
// Name=>enum data for binary & enumerated properties
|
||||
Offset nameToEnum_offset;
|
||||
|
||||
// Enum=>offset EnumToOffset object mapping enumerated properties
|
||||
// to ValueMap objects
|
||||
Offset enumToValue_offset;
|
||||
// -- end pnames data --
|
||||
|
||||
friend class Builder;
|
||||
|
||||
const ValueMap* getValueMap(EnumValue prop) const;
|
||||
|
||||
const char* chooseNameInGroup(Offset offset,
|
||||
UPropertyNameChoice choice) const;
|
||||
|
||||
public:
|
||||
|
||||
inline const int8_t* getPointer(Offset o) const {
|
||||
return ((const int8_t*) this) + o;
|
||||
}
|
||||
|
||||
inline const int8_t* getPointerNull(Offset o) const {
|
||||
return o ? getPointer(o) : NULL;
|
||||
}
|
||||
|
||||
inline const char* getPropertyName(EnumValue prop,
|
||||
UPropertyNameChoice choice) const;
|
||||
|
||||
inline EnumValue getPropertyEnum(const char* alias) const;
|
||||
|
||||
inline const char* getPropertyValueName(EnumValue prop, EnumValue value,
|
||||
UPropertyNameChoice choice) const;
|
||||
|
||||
inline EnumValue getPropertyValueEnum(EnumValue prop,
|
||||
const char* alias) const;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// ValueMap
|
||||
|
||||
/**
|
||||
* For any top-level property that has named values (binary and
|
||||
* enumerated properties), there is a ValueMap object. This object
|
||||
* maps from enum values to two other maps. One goes from value enums
|
||||
* to value names. The other goes from value names to value enums.
|
||||
*
|
||||
* The value enum values may be contiguous or disjoint. If they are
|
||||
* contiguous then the enumToName_offset is nonzero, and the
|
||||
* ncEnumToName_offset is zero. Vice versa if the value enums are
|
||||
* disjoint.
|
||||
*
|
||||
* There are n of these objects, where n is the number of binary
|
||||
* properties + the number of enumerated properties.
|
||||
*/
|
||||
struct ValueMap {
|
||||
|
||||
// -- begin pnames data --
|
||||
// Enum=>name EnumToOffset / NonContiguousEnumToOffset objects.
|
||||
// Exactly one of these will be nonzero.
|
||||
Offset enumToName_offset;
|
||||
Offset ncEnumToName_offset;
|
||||
|
||||
Offset nameToEnum_offset; // Name=>enum data
|
||||
// -- end pnames data --
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// EnumToOffset
|
||||
|
||||
/**
|
||||
* A generic map from enum values to Offsets. The enum values must be
|
||||
* contiguous, from enumStart to enumLimit. The Offset values may
|
||||
* point to anything.
|
||||
*/
|
||||
class EnumToOffset {
|
||||
|
||||
// -- begin pnames data --
|
||||
EnumValue enumStart;
|
||||
EnumValue enumLimit;
|
||||
Offset _offsetArray; // [array of enumLimit-enumStart]
|
||||
// -- end pnames data --
|
||||
|
||||
friend class Builder;
|
||||
|
||||
Offset* getOffsetArray() {
|
||||
return &_offsetArray;
|
||||
}
|
||||
|
||||
const Offset* getOffsetArray() const {
|
||||
return &_offsetArray;
|
||||
}
|
||||
|
||||
static int32_t getSize(int32_t n) {
|
||||
return sizeof(EnumToOffset) + sizeof(Offset) * (n - 1);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
Offset getOffset(EnumValue enumProbe) const {
|
||||
if (enumProbe < enumStart ||
|
||||
enumProbe >= enumLimit) {
|
||||
return 0; // not found
|
||||
}
|
||||
const Offset* p = getOffsetArray();
|
||||
return p[enumProbe - enumStart];
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// NonContiguousEnumToOffset
|
||||
|
||||
/**
|
||||
* A generic map from enum values to Offsets. The enum values may be
|
||||
* disjoint. If they are contiguous, an EnumToOffset should be used
|
||||
* instead. The Offset values may point to anything.
|
||||
*/
|
||||
class NonContiguousEnumToOffset {
|
||||
|
||||
// -- begin pnames data --
|
||||
int32_t count;
|
||||
EnumValue _enumArray; // [array of count]
|
||||
// Offset _offsetArray; // [array of count] after enumValue[count-1]
|
||||
// -- end pnames data --
|
||||
|
||||
friend class Builder;
|
||||
|
||||
EnumValue* getEnumArray() {
|
||||
return &_enumArray;
|
||||
}
|
||||
|
||||
const EnumValue* getEnumArray() const {
|
||||
return &_enumArray;
|
||||
}
|
||||
|
||||
Offset* getOffsetArray() {
|
||||
return (Offset*) (getEnumArray() + count);
|
||||
}
|
||||
|
||||
const Offset* getOffsetArray() const {
|
||||
return (Offset*) (getEnumArray() + count);
|
||||
}
|
||||
|
||||
static int32_t getSize(int32_t n) {
|
||||
return sizeof(int32_t) + (sizeof(EnumValue) + sizeof(Offset)) * n;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
Offset getOffset(EnumValue enumProbe) const {
|
||||
const EnumValue* e = getEnumArray();
|
||||
const Offset* p = getOffsetArray();
|
||||
// linear search; binary later if warranted
|
||||
// (binary is not faster for short lists)
|
||||
for (int32_t i=0; i<count; ++i) {
|
||||
if (e[i] < enumProbe) continue;
|
||||
if (e[i] > enumProbe) break;
|
||||
return p[i];
|
||||
}
|
||||
return 0; // not found
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// NameToEnum
|
||||
|
||||
/**
|
||||
* A map from names to enum values.
|
||||
*/
|
||||
class NameToEnum {
|
||||
|
||||
// -- begin pnames data --
|
||||
int32_t count; // number of entries
|
||||
EnumValue _enumArray; // [array of count] EnumValues
|
||||
// Offset _nameArray; // [array of count] offsets to names
|
||||
// -- end pnames data --
|
||||
|
||||
friend class Builder;
|
||||
|
||||
EnumValue* getEnumArray() {
|
||||
return &_enumArray;
|
||||
}
|
||||
|
||||
const EnumValue* getEnumArray() const {
|
||||
return &_enumArray;
|
||||
}
|
||||
|
||||
Offset* getNameArray() {
|
||||
return (Offset*) (getEnumArray() + count);
|
||||
}
|
||||
|
||||
const Offset* getNameArray() const {
|
||||
return (Offset*) (getEnumArray() + count);
|
||||
}
|
||||
|
||||
static int32_t getSize(int32_t n) {
|
||||
return sizeof(int32_t) + (sizeof(Offset) + sizeof(EnumValue)) * n;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
EnumValue getEnum(const char* alias, const PropertyAliases& data) const {
|
||||
|
||||
const Offset* n = getNameArray();
|
||||
const EnumValue* e = getEnumArray();
|
||||
|
||||
// linear search; binary later if warranted
|
||||
// (binary is not faster for short lists)
|
||||
for (int32_t i=0; i<count; ++i) {
|
||||
const char* name = (const char*) data.getPointer(n[i]);
|
||||
int32_t c = uprv_comparePropertyNames(alias, name);
|
||||
if (c > 0) continue;
|
||||
if (c < 0) break;
|
||||
return e[i];
|
||||
}
|
||||
|
||||
return UCHAR_INVALID_CODE;
|
||||
}
|
||||
};
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
*
|
||||
* In-memory layout
|
||||
* (>x) this structure refers to structure x
|
||||
*
|
||||
* header:
|
||||
* enumToName_offset (>0)
|
||||
* nameToEnum_offset (>2)
|
||||
* enumToValue_offset (>3)
|
||||
*
|
||||
* 0: # EnumToOffset obj for props => name groups
|
||||
* count
|
||||
* enumArray [x count]
|
||||
* offsetArray [x count] (>98)
|
||||
*
|
||||
* => pad to next 4-byte boundary
|
||||
*
|
||||
* (1: omitted -- no longer used)
|
||||
*
|
||||
* 2: # NameToEnum obj for binary & enumerated props
|
||||
* count
|
||||
* enumArray [x count]
|
||||
* nameArray [x count] (>99)
|
||||
*
|
||||
* => pad to next 4-byte boundary
|
||||
*
|
||||
* 3: # EnumToOffset obj for enumerated props => ValueMaps
|
||||
* count
|
||||
* enumArray [x count]
|
||||
* offsetArray [x count] (>4)
|
||||
*
|
||||
* => pad to next 4-byte boundary
|
||||
*
|
||||
* 4: # ValueMap array [x one for each enumerated prop i]
|
||||
* enumToName_offset (>5a +2*i) one of these two is NULL, one is not
|
||||
* ncEnumToName_offset (>5b +2*i)
|
||||
* nameToEnums_offset (>6 +2*i)
|
||||
*
|
||||
* => pad to next 4-byte boundary
|
||||
*
|
||||
* for each enumerated prop (either 5a or 5b):
|
||||
*
|
||||
* 5a: # EnumToOffset for enumerated prop's values => name groups
|
||||
* enumStart
|
||||
* enumLimit
|
||||
* offsetArray [x enumLimit - enumStart] (>98)
|
||||
*
|
||||
* => pad to next 4-byte boundary
|
||||
*
|
||||
* 5b: # NonContiguousEnumToOffset for enumerated prop's values => name groups
|
||||
* count
|
||||
* enumArray [x count]
|
||||
* offsetArray [x count] (>98)
|
||||
*
|
||||
* => pad to next 4-byte boundary
|
||||
*
|
||||
* 6: # NameToEnum for enumerated prop's values
|
||||
* count
|
||||
* enumArray [x count]
|
||||
* nameArray [x count] (>99)
|
||||
*
|
||||
* => pad to next 4-byte boundary
|
||||
*
|
||||
* 98: # name group pool {NGP}
|
||||
* [array of Offset values] (>99)
|
||||
*
|
||||
* 99: # string pool {SP}
|
||||
* [pool of nul-terminated char* strings]
|
||||
*
|
||||
*/
|
||||
|
||||
#endif
|
||||
|
||||
//eof
|
Loading…
Add table
Reference in a new issue