ICU-8605 document & test ICU4C dependencies, remove cycles, reduce some deps; merged from branches/markus/depstest -r 30155:30193

X-SVN-Rev: 30194
This commit is contained in:
Markus Scherer 2011-06-03 05:23:57 +00:00
parent 8cc8801c03
commit 9f7d74001c
89 changed files with 2732 additions and 1220 deletions

View file

@ -90,12 +90,13 @@ stringtriebuilder.o bytestriebuilder.o \
bytestrie.o bytestrieiterator.o \
ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \
appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \
chariter.o schriter.o uchriter.o uiter.o \
patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
uscript.o usc_impl.o unames.o \
utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o triedict.o \
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \

View file

@ -21,6 +21,7 @@
#include "uhash.h"
#include "uarrsort.h"
#include "uassert.h"
#include "ustr_imp.h"
U_NAMESPACE_BEGIN
@ -335,7 +336,7 @@ BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar
BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
: LinearMatchNode(len, nextNode), s(bytes) {
hash=hash*37+uhash_hashCharsN(bytes, len);
hash=hash*37+ustr_hashCharsN(bytes, len);
}
UBool

View file

@ -1,6 +1,6 @@
/*
*****************************************************************************
* Copyright (C) 1996-2010, International Business Machines Corporation and *
* Copyright (C) 1996-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*****************************************************************************
*/
@ -288,7 +288,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
if(U_FAILURE(status)) {
return;
}
subpermute.setValueDeleter(uhash_deleteUnicodeString);
subpermute.setValueDeleter(uprv_deleteUObject);
for (i = 0; i < source.length(); i += UTF16_CHAR_LENGTH(cp)) {
cp = source.char32At(i);
@ -345,9 +345,9 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
if (U_FAILURE(status)) {
return 0;
}
result.setValueDeleter(uhash_deleteUnicodeString);
permutations.setValueDeleter(uhash_deleteUnicodeString);
basic.setValueDeleter(uhash_deleteUnicodeString);
result.setValueDeleter(uprv_deleteUObject);
permutations.setValueDeleter(uprv_deleteUObject);
basic.setValueDeleter(uprv_deleteUObject);
UChar USeg[256];
int32_t segLen = segment.extract(USeg, 256, status);
@ -453,7 +453,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UCh
while (iter.next()) {
UChar32 cp2 = iter.getCodepoint();
Hashtable remainder(status);
remainder.setValueDeleter(uhash_deleteUnicodeString);
remainder.setValueDeleter(uprv_deleteUObject);
if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) {
continue;
}

View file

@ -91,6 +91,22 @@ cmemory_inUse(void);
U_CFUNC UBool
cmemory_cleanup(void);
/**
* A function called by <TT>uhash_remove</TT>,
* <TT>uhash_close</TT>, or <TT>uhash_put</TT> to delete
* an existing key or value.
* @param obj A key or value stored in a hashtable
* @see uprv_deleteUObject
*/
typedef void U_CALLCONV UObjectDeleter(void* obj);
/**
* Deleter for UObject instances.
* Works for all subclasses of UObject because it has a virtual destructor.
*/
U_CAPI void U_EXPORT2
uprv_deleteUObject(void *obj);
#ifdef XP_CPLUSPLUS
U_NAMESPACE_BEGIN

View file

@ -383,6 +383,7 @@
<ClCompile Include="unifilt.cpp" />
<ClCompile Include="unifunct.cpp" />
<ClCompile Include="uniset.cpp" />
<ClCompile Include="uniset_closure.cpp" />
<ClCompile Include="uniset_props.cpp" />
<ClCompile Include="unisetspan.cpp" />
<ClCompile Include="uprops.cpp" />
@ -414,6 +415,7 @@
<ClCompile Include="stringpiece.cpp" />
<ClCompile Include="stringtriebuilder.cpp" />
<ClCompile Include="ucasemap.cpp" />
<ClCompile Include="ucasemap_titlecase_brkiter.cpp" />
<ClCompile Include="ucharstrie.cpp" />
<ClCompile Include="ucharstriebuilder.cpp" />
<ClCompile Include="ucharstrieiterator.cpp" />
@ -422,11 +424,15 @@
<ClCompile Include="uiter.cpp" />
<ClCompile Include="unistr.cpp" />
<ClCompile Include="unistr_case.cpp" />
<ClCompile Include="unistr_case_locale.cpp" />
<ClCompile Include="unistr_cnv.cpp" />
<ClCompile Include="unistr_props.cpp" />
<ClCompile Include="unistr_titlecase_brkiter.cpp" />
<ClCompile Include="ustr_cnv.c" />
<ClCompile Include="ustr_titlecase_brkiter.cpp" />
<ClCompile Include="ustr_wcs.cpp" />
<ClCompile Include="ustrcase.cpp" />
<ClCompile Include="ustrcase_locale.cpp" />
<ClCompile Include="ustring.cpp" />
<ClCompile Include="ustrtrns.cpp" />
<ClCompile Include="utext.cpp" />

View file

@ -391,6 +391,9 @@
<ClCompile Include="uniset.cpp">
<Filter>properties &amp; sets</Filter>
</ClCompile>
<ClCompile Include="uniset_closure.cpp">
<Filter>properties &amp; sets</Filter>
</ClCompile>
<ClCompile Include="uniset_props.cpp">
<Filter>properties &amp; sets</Filter>
</ClCompile>
@ -466,6 +469,9 @@
<ClCompile Include="ucasemap.cpp">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="ucasemap_titlecase_brkiter.cpp">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="uchriter.cpp">
<Filter>strings</Filter>
</ClCompile>
@ -481,21 +487,33 @@
<ClCompile Include="unistr_case.cpp">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="unistr_case_locale.cpp">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="unistr_cnv.cpp">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="unistr_props.cpp">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="unistr_titlecase_brkiter.cpp">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="ustr_cnv.c">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="ustr_titlecase_brkiter.cpp">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="ustr_wcs.cpp">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="ustrcase.cpp">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="ustrcase_locale.cpp">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="ustring.cpp">
<Filter>strings</Filter>
</ClCompile>

View file

@ -1,6 +1,6 @@
/*
******************************************************************************
* Copyright (C) 1997-2010, International Business Machines
* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* Date Name Description
@ -13,6 +13,7 @@
#include "unicode/unistr.h"
#include "unicode/uobject.h"
#include "cmemory.h"
#include "uhash.h"
U_NAMESPACE_BEGIN
@ -108,7 +109,7 @@ inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
uhash_init(&hashObj, keyHash, keyComp, valueComp, &status);
if (U_SUCCESS(status)) {
hash = &hashObj;
uhash_setKeyDeleter(hash, uhash_deleteUnicodeString);
uhash_setKeyDeleter(hash, uprv_deleteUObject);
}
}

View file

@ -844,7 +844,18 @@ unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
// Some properties APIs ---------------------------------------------------- ***
U_CFUNC UNormalizationCheckResult U_EXPORT2
U_CAPI uint8_t U_EXPORT2
u_getCombiningClass(UChar32 c) {
UErrorCode errorCode=U_ZERO_ERROR;
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
if(U_SUCCESS(errorCode)) {
return impl->getCC(impl->getNorm16(c));
} else {
return 0;
}
}
U_CFUNC UNormalizationCheckResult
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
return UNORM_YES;
@ -858,6 +869,17 @@ unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
}
}
U_CFUNC uint16_t
unorm_getFCD16Simple(UChar32 c) {
UErrorCode errorCode=U_ZERO_ERROR;
const UTrie2 *trie=Normalizer2Factory::getFCDTrie(errorCode);
if(U_SUCCESS(errorCode)) {
return UTRIE2_GET16(trie, c);
} else {
return 0;
}
}
U_CAPI const uint16_t * U_EXPORT2
unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) {
const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode);

View file

@ -25,7 +25,6 @@
#include "mutex.h"
#include "normalizer2impl.h"
#include "uassert.h"
#include "uhash.h"
#include "uset_imp.h"
#include "utrie2.h"
#include "uvector.h"
@ -1713,7 +1712,7 @@ const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *l
CanonIterData::CanonIterData(UErrorCode &errorCode) :
trie(utrie2_open(0, 0, &errorCode)),
canonStartSets(uhash_deleteUObject, NULL, errorCode) {}
canonStartSets(uprv_deleteUObject, NULL, errorCode) {}
CanonIterData::~CanonIterData() {
utrie2_close(trie);

View file

@ -547,9 +547,16 @@ unorm2_swap(const UDataSwapper *ds,
* Get the NF*_QC property for a code point, for u_getIntPropertyValue().
* @internal
*/
U_CFUNC UNormalizationCheckResult U_EXPORT2
U_CFUNC UNormalizationCheckResult
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode);
/**
* Get the 16-bit FCD value (lead & trail CCs) for a code point, for u_getIntPropertyValue().
* @internal
*/
U_CFUNC uint16_t
unorm_getFCD16Simple(UChar32 c);
/**
* Internal API, used by collation code.
* Get access to the internal FCD trie table to be able to perform

View file

@ -12,6 +12,7 @@
#include "propname.h"
#include "unicode/uchar.h"
#include "unicode/udata.h"
#include "unicode/uscript.h"
#include "umutex.h"
#include "cmemory.h"
#include "cstring.h"
@ -312,3 +313,15 @@ u_getPropertyValueEnum(UProperty property,
U_NAMESPACE_USE
return PropNameData::getPropertyValueEnum(property, alias);
}
U_CAPI const char* U_EXPORT2
uscript_getName(UScriptCode scriptCode){
return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
U_LONG_PROPERTY_NAME);
}
U_CAPI const char* U_EXPORT2
uscript_getShortName(UScriptCode scriptCode){
return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
U_SHORT_PROPERTY_NAME);
}

View file

@ -23,7 +23,6 @@
#include "unicode/uchriter.h"
#include "unicode/parsepos.h"
#include "unicode/parseerr.h"
#include "util.h"
#include "cmemory.h"
#include "cstring.h"
@ -122,18 +121,14 @@ RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb)
// and the time to build these few sets should be small compared to a
// full break iterator build.
fRuleSets[kRuleSet_rule_char-128] = UnicodeSet(gRuleSet_rule_char_pattern, *rb->fStatus);
UnicodeSet *whitespaceSet = uprv_openPatternWhiteSpaceSet(rb->fStatus);
if (U_FAILURE(*rb->fStatus)) {
return;
}
fRuleSets[kRuleSet_white_space-128] = *whitespaceSet;
delete whitespaceSet;
// fRuleSets[kRuleSet_white_space-128] = [:Pattern_White_Space:]
fRuleSets[kRuleSet_white_space-128].add(9, 0xd).add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
fRuleSets[kRuleSet_name_char-128] = UnicodeSet(gRuleSet_name_char_pattern, *rb->fStatus);
fRuleSets[kRuleSet_name_start_char-128] = UnicodeSet(gRuleSet_name_start_char_pattern, *rb->fStatus);
fRuleSets[kRuleSet_digit_char-128] = UnicodeSet(gRuleSet_digit_char_pattern, *rb->fStatus);
if (*rb->fStatus == U_ILLEGAL_ARGUMENT_ERROR) {
// This case happens if ICU's data is missing. UnicodeSet tries to look up property
// names from the init string, can't find them, and claims an illegal arguement.
// names from the init string, can't find them, and claims an illegal argument.
// Change the error so that the actual problem will be clearer to users.
*rb->fStatus = U_BRK_INIT_ERROR;
}
@ -1146,12 +1141,11 @@ void RBBIRuleScanner::scanSet() {
pos.setIndex(fScanIndex);
startPos = fScanIndex;
UErrorCode localStatus = U_ZERO_ERROR;
uset = new UnicodeSet(fRB->fRules, pos, USET_IGNORE_SPACE,
fSymbolTable,
localStatus);
uset = new UnicodeSet();
if (uset == NULL) {
localStatus = U_MEMORY_ALLOCATION_ERROR;
}
uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus);
if (U_FAILURE(localStatus)) {
// TODO: Get more accurate position of the error from UnicodeSet's return info.
// UnicodeSet appears to not be reporting correctly at this time.

View file

@ -278,7 +278,7 @@ public:
DNCache(const Locale& _locale)
: cache(), locale(_locale)
{
// cache.setKeyDeleter(uhash_deleteUnicodeString);
// cache.setKeyDeleter(uprv_deleteUObject);
}
};
@ -519,7 +519,7 @@ ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUSer
// fallback to the one that succeeded, we want to hit the
// cache the first time next goaround.
if (cacheDescriptorList._obj == NULL) {
cacheDescriptorList._obj = new UVector(uhash_deleteUnicodeString, NULL, 5, status);
cacheDescriptorList._obj = new UVector(uprv_deleteUObject, NULL, 5, status);
if (U_FAILURE(status)) {
return NULL;
}

View file

@ -15,7 +15,6 @@
#include "cmemory.h"
#include "servloc.h"
#include "ustrfmt.h"
#include "uhash.h"
#include "charstr.h"
#include "ucln_cmn.h"
#include "uassert.h"
@ -163,7 +162,7 @@ private:
ServiceEnumeration(const ICULocaleService* service, UErrorCode &status)
: _service(service)
, _timestamp(service->getTimestamp())
, _ids(uhash_deleteUnicodeString, NULL, status)
, _ids(uprv_deleteUObject, NULL, status)
, _pos(0)
{
_service->getVisibleIDs(_ids, status);
@ -172,7 +171,7 @@ private:
ServiceEnumeration(const ServiceEnumeration &other, UErrorCode &status)
: _service(other._service)
, _timestamp(other._timestamp)
, _ids(uhash_deleteUnicodeString, NULL, status)
, _ids(uprv_deleteUObject, NULL, status)
, _pos(0)
{
if(U_SUCCESS(status)) {

View file

@ -51,7 +51,7 @@ StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
if(U_SUCCESS(errorCode)) {
uhash_setKeyDeleter(nodes, uhash_deleteUObject);
uhash_setKeyDeleter(nodes, uprv_deleteUObject);
}
}

View file

@ -17,11 +17,12 @@
*/
#include "unicode/utypes.h"
#include "unicode/brkiter.h"
#include "unicode/ubrk.h"
#include "unicode/uloc.h"
#include "unicode/ustring.h"
#include "unicode/ucasemap.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/ubrk.h"
#include "unicode/utext.h"
#endif
#include "cmemory.h"
@ -29,6 +30,8 @@
#include "ucase.h"
#include "ustr_imp.h"
U_NAMESPACE_USE
/* UCaseMap service object -------------------------------------------------- */
U_CAPI UCaseMap * U_EXPORT2
@ -60,7 +63,8 @@ U_CAPI void U_EXPORT2
ucasemap_close(UCaseMap *csm) {
if(csm!=NULL) {
#if !UCONFIG_NO_BREAK_ITERATION
ubrk_close(csm->iter);
// Do not call ubrk_close() so that we do not depend on all of the BreakIterator code.
delete reinterpret_cast<BreakIterator *>(csm->iter);
#endif
uprv_free(csm);
}
@ -106,21 +110,6 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode * /*pErrorCode*/
csm->options=options;
}
#if !UCONFIG_NO_BREAK_ITERATION
U_CAPI const UBreakIterator * U_EXPORT2
ucasemap_getBreakIterator(const UCaseMap *csm) {
return csm->iter;
}
U_CAPI void U_EXPORT2
ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * /*pErrorCode*/) {
ubrk_close(csm->iter);
csm->iter=iterToAdopt;
}
#endif
/* UTF-8 string case mappings ----------------------------------------------- */
/* TODO(markus): Move to a new, separate utf8case.c file. */
@ -262,37 +251,29 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
#if !UCONFIG_NO_BREAK_ITERATION
/*
* Internal titlecasing function.
*/
static int32_t
_toTitle(UCaseMap *csm,
U_CFUNC int32_t U_CALLCONV
ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, UCaseContext *csc,
int32_t srcLength,
const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode) {
UText utext=UTEXT_INITIALIZER;
const UChar *s;
UChar32 c;
int32_t prev, titleStart, titleLimit, idx, destIndex, length;
UBool isFirstIndex;
utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
}
if(csm->iter==NULL) {
csm->iter=ubrk_open(UBRK_WORD, csm->locale,
NULL, 0,
pErrorCode);
}
ubrk_setUText(csm->iter, &utext, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
utext_close(&utext);
return 0;
}
// Use the C++ abstract base class to minimize dependencies.
// TODO: Change UCaseMap.iter to store a BreakIterator directly.
BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
/* set up local variables */
int32_t locCache=csm->locCache;
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
destIndex=0;
prev=0;
isFirstIndex=TRUE;
@ -302,9 +283,9 @@ _toTitle(UCaseMap *csm,
/* find next index where to titlecase */
if(isFirstIndex) {
isFirstIndex=FALSE;
idx=ubrk_first(csm->iter);
idx=bi->first();
} else {
idx=ubrk_next(csm->iter);
idx=bi->next();
}
if(idx==UBRK_DONE || idx>srcLength) {
idx=srcLength;
@ -354,15 +335,14 @@ _toTitle(UCaseMap *csm,
if(titleStart<titleLimit) {
/* titlecase c which is from [titleStart..titleLimit[ */
csc->cpStart=titleStart;
csc->cpLimit=titleLimit;
c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &csm->locCache);
csc.cpStart=titleStart;
csc.cpLimit=titleLimit;
c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache);
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
/* Special case Dutch IJ titlecasing */
if ( titleStart+1 < idx &&
ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH &&
ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_DUTCH &&
( src[titleStart] == 0x0049 || src[titleStart] == 0x0069 ) &&
( src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A )) {
c=0x004A;
@ -377,7 +357,7 @@ _toTitle(UCaseMap *csm,
_caseMap(
csm, ucase_toFullLower,
dest+destIndex, destCapacity-destIndex,
src, csc,
src, &csc,
titleLimit, idx,
pErrorCode);
} else {
@ -398,12 +378,41 @@ _toTitle(UCaseMap *csm,
if(destIndex>destCapacity) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
utext_close(&utext);
return destIndex;
}
#endif
static int32_t U_CALLCONV
ucasemap_internalUTF8ToLower(const UCaseMap *csm,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode) {
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
return _caseMap(
csm, ucase_toFullLower,
dest, destCapacity,
src, &csc, 0, srcLength,
pErrorCode);
}
static int32_t U_CALLCONV
ucasemap_internalUTF8ToUpper(const UCaseMap *csm,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode) {
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
return _caseMap(
csm, ucase_toFullUpper,
dest, destCapacity,
src, &csc, 0, srcLength,
pErrorCode);
}
static int32_t
utf8_foldCase(const UCaseProps *csp,
uint8_t *dest, int32_t destCapacity,
@ -442,19 +451,20 @@ utf8_foldCase(const UCaseProps *csp,
return destIndex;
}
/*
* Implement argument checking and buffer handling
* for string case mapping as a common function.
*/
static int32_t U_CALLCONV
ucasemap_internalUTF8Fold(const UCaseMap *csm,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode) {
return utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode);
}
/* common internal function for public API functions */
static int32_t
caseMap(const UCaseMap *csm,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
int32_t toWhichCase,
UErrorCode *pErrorCode) {
U_CFUNC int32_t
ucasemap_mapUTF8(const UCaseMap *csm,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper,
UErrorCode *pErrorCode) {
int32_t destLength;
/* check argument values */
@ -484,42 +494,7 @@ caseMap(const UCaseMap *csm,
return 0;
}
destLength=0;
if(toWhichCase==FOLD_CASE) {
destLength=utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength,
csm->options, pErrorCode);
} else {
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
if(toWhichCase==TO_LOWER) {
destLength=_caseMap(csm, ucase_toFullLower,
dest, destCapacity,
src, &csc,
0, srcLength,
pErrorCode);
} else if(toWhichCase==TO_UPPER) {
destLength=_caseMap(csm, ucase_toFullUpper,
dest, destCapacity,
src, &csc,
0, srcLength,
pErrorCode);
} else /* if(toWhichCase==TO_TITLE) */ {
#if UCONFIG_NO_BREAK_ITERATION
*pErrorCode=U_UNSUPPORTED_ERROR;
#else
/* UCaseMap is actually non-const in toTitle() APIs. */
UCaseMap *tmp = (UCaseMap *)csm;
destLength=_toTitle(tmp, dest, destCapacity,
src, &csc, srcLength,
pErrorCode);
#endif
}
}
destLength=stringCaseMapper(csm, dest, destCapacity, src, srcLength, pErrorCode);
return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode);
}
@ -530,10 +505,10 @@ ucasemap_utf8ToLower(const UCaseMap *csm,
char *dest, int32_t destCapacity,
const char *src, int32_t srcLength,
UErrorCode *pErrorCode) {
return caseMap(csm,
return ucasemap_mapUTF8(csm,
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
TO_LOWER, pErrorCode);
ucasemap_internalUTF8ToLower, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
@ -541,34 +516,19 @@ ucasemap_utf8ToUpper(const UCaseMap *csm,
char *dest, int32_t destCapacity,
const char *src, int32_t srcLength,
UErrorCode *pErrorCode) {
return caseMap(csm,
return ucasemap_mapUTF8(csm,
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
TO_UPPER, pErrorCode);
ucasemap_internalUTF8ToUpper, pErrorCode);
}
#if !UCONFIG_NO_BREAK_ITERATION
U_CAPI int32_t U_EXPORT2
ucasemap_utf8ToTitle(UCaseMap *csm,
char *dest, int32_t destCapacity,
const char *src, int32_t srcLength,
UErrorCode *pErrorCode) {
return caseMap(csm,
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
TO_TITLE, pErrorCode);
}
#endif
U_CAPI int32_t U_EXPORT2
ucasemap_utf8FoldCase(const UCaseMap *csm,
char *dest, int32_t destCapacity,
const char *src, int32_t srcLength,
UErrorCode *pErrorCode) {
return caseMap(csm,
return ucasemap_mapUTF8(csm,
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
FOLD_CASE, pErrorCode);
ucasemap_internalUTF8Fold, pErrorCode);
}

View file

@ -0,0 +1,67 @@
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: ucasemap_titlecase_brkiter.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2011jun02
* created by: Markus W. Scherer
*
* Titlecasing functions that are based on BreakIterator
* were moved here to break dependency cycles among parts of the common library.
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/brkiter.h"
#include "unicode/ubrk.h"
#include "unicode/ucasemap.h"
#include "cmemory.h"
#include "ucase.h"
#include "ustr_imp.h"
U_NAMESPACE_USE
U_CAPI const UBreakIterator * U_EXPORT2
ucasemap_getBreakIterator(const UCaseMap *csm) {
return csm->iter;
}
U_CAPI void U_EXPORT2
ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * /*pErrorCode*/) {
// Do not call ubrk_close() so that we do not depend on all of the BreakIterator code.
delete reinterpret_cast<BreakIterator *>(csm->iter);
csm->iter=iterToAdopt;
}
U_CAPI int32_t U_EXPORT2
ucasemap_utf8ToTitle(UCaseMap *csm,
char *dest, int32_t destCapacity,
const char *src, int32_t srcLength,
UErrorCode *pErrorCode) {
UText utext=UTEXT_INITIALIZER;
utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
}
if(csm->iter==NULL) {
csm->iter=ubrk_open(UBRK_WORD, csm->locale,
NULL, 0,
pErrorCode);
}
ubrk_setUText(csm->iter, &utext, pErrorCode);
int32_t length=ucasemap_mapUTF8(csm,
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
ucasemap_internalUTF8ToTitle, pErrorCode);
utext_close(&utext);
return length;
}
#endif // !UCONFIG_NO_BREAK_ITERATION

View file

@ -21,6 +21,7 @@
#include "uarrsort.h"
#include "uassert.h"
#include "uhash.h"
#include "ustr_imp.h"
U_NAMESPACE_BEGIN
@ -283,7 +284,7 @@ UCharsTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UCha
UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode)
: LinearMatchNode(len, nextNode), s(units) {
hash=hash*37+uhash_hashUCharsN(units, len);
hash=hash*37+ustr_hashUCharsN(units, len);
}
UBool

View file

@ -1,6 +1,6 @@
/*
******************************************************************************
* Copyright (C) 1998-2010, International Business Machines Corporation and
* Copyright (C) 1998-2011, International Business Machines Corporation and
* others. All Rights Reserved.
******************************************************************************
*/
@ -9,7 +9,7 @@
#include "unicode/uchriter.h"
#include "unicode/ustring.h"
#include "uhash.h"
#include "ustr_imp.h"
U_NAMESPACE_BEGIN
@ -83,7 +83,7 @@ UCharCharacterIterator::operator==(const ForwardCharacterIterator& that) const {
int32_t
UCharCharacterIterator::hashCode() const {
return uhash_hashUCharsN(text, textLength) ^ pos ^ begin ^ end;
return ustr_hashUCharsN(text, textLength) ^ pos ^ begin ^ end;
}
CharacterIterator*

View file

@ -1,11 +1,11 @@
/*
******************************************************************************
* *
* Copyright (C) 2001-2010, International Business Machines *
* Corporation and others. All Rights Reserved. *
* *
*
* Copyright (C) 2001-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: ucln_cmn.h
* file name: ucln.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
@ -18,6 +18,7 @@
#define __UCLN_H__
#include "unicode/utypes.h"
#include "umutex.h"
/** These are the functions used to register a library's memory cleanup
* functions. Each library should define a single library register function
@ -81,9 +82,16 @@ U_CAPI void U_EXPORT2 ucln_registerCleanup(ECleanupLibraryType type,
/**
* Request cleanup for one specific library.
* Not thread safe.
* Calling this with UCLN_COMMON just calls u_cleanup();
* @param type which library to cleanup
*/
U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType type);
/* ucln_cmn.c variables shared with uinit.c */
U_CDECL_BEGIN
extern UBool gICUInitialized;
extern UMTX gICUInitMutex;
U_CDECL_END
#endif

View file

@ -1,6 +1,6 @@
/*
******************************************************************************
* Copyright (C) 2001-2010, International Business Machines
* Copyright (C) 2001-2011, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* file name: ucln_cmn.c
@ -15,7 +15,6 @@
#include "unicode/utypes.h"
#include "unicode/uclean.h"
#include "utracimp.h"
#include "ustr_imp.h"
#include "ucln_cmn.h"
#include "umutex.h"
#include "ucln.h"
@ -23,24 +22,43 @@
#include "uassert.h"
/** Auto-client for UCLN_COMMON **/
#define UCLN_TYPE UCLN_COMMON
#define UCLN_TYPE_IS_COMMON
#include "ucln_imp.h"
U_CDECL_BEGIN
UBool gICUInitialized = FALSE;
UMTX gICUInitMutex = NULL;
U_CDECL_END
static cleanupFunc *gCommonCleanupFunctions[UCLN_COMMON_COUNT];
static cleanupFunc *gLibCleanupFunctions[UCLN_COMMON];
/************************************************
The cleanup order is important in this function.
Please be sure that you have read ucln.h
************************************************/
U_CAPI void U_EXPORT2
u_cleanup(void)
{
UTRACE_ENTRY_OC(UTRACE_U_CLEANUP);
umtx_lock(NULL); /* Force a memory barrier, so that we are sure to see */
umtx_unlock(NULL); /* all state left around by any other threads. */
/* Enables debugging information about when a library is cleaned up. */
#ifndef UCLN_DEBUG_CLEANUP
#define UCLN_DEBUG_CLEANUP 0
#endif
ucln_lib_cleanup();
umtx_destroy(&gICUInitMutex);
umtx_cleanup();
cmemory_cleanup(); /* undo any heap functions set by u_setMemoryFunctions(). */
gICUInitialized = FALSE;
UTRACE_EXIT(); /* Must be before utrace_cleanup(), which turns off tracing. */
/*#if U_ENABLE_TRACING*/
utrace_cleanup();
/*#endif*/
}
#if defined(UCLN_DEBUG_CLEANUP)
#include <stdio.h>
#endif
static void ucln_cleanup_internal(ECleanupLibraryType libType)
U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType libType)
{
if (gLibCleanupFunctions[libType])
{
@ -49,22 +67,6 @@ static void ucln_cleanup_internal(ECleanupLibraryType libType)
}
}
U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType libType)
{
if(libType==UCLN_COMMON) {
#if UCLN_DEBUG_CLEANUP
fprintf(stderr, "Cleaning up: UCLN_COMMON with u_cleanup, type %d\n", (int)libType);
#endif
u_cleanup();
} else {
#if UCLN_DEBUG_CLEANUP
fprintf(stderr, "Cleaning up: using ucln_cleanup_internal, type %d\n", (int)libType);
#endif
ucln_cleanup_internal(libType);
}
}
U_CFUNC void
ucln_common_registerCleanup(ECleanupCommonType type,
cleanupFunc *func)
@ -95,7 +97,7 @@ U_CFUNC UBool ucln_lib_cleanup(void) {
ECleanupCommonType commonFunc = UCLN_COMMON_START;
for (libType++; libType<UCLN_COMMON; libType++) {
ucln_cleanup_internal(libType);
ucln_cleanupOne(libType);
}
for (commonFunc++; commonFunc<UCLN_COMMON_COUNT; commonFunc++) {

View file

@ -1,9 +1,9 @@
/*
******************************************************************************
* *
* Copyright (C) 2009, International Business Machines *
* Corporation and others. All Rights Reserved. *
* *
*
* Copyright (C) 2009-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: ucln_imp.h
* encoding: US-ASCII
@ -56,6 +56,12 @@
*/
/*static void ucln_unRegisterAutomaticCleanup();*/
#ifdef UCLN_TYPE_IS_COMMON
# define UCLN_CLEAN_ME_UP u_cleanup()
#else
# define UCLN_CLEAN_ME_UP ucln_cleanupOne(UCLN_TYPE)
#endif
/* ------------ automatic cleanup: registration. Choose ONE ------- */
#if defined(UCLN_AUTO_LOCAL)
/* To use:
@ -75,7 +81,7 @@ static UBool gAutoCleanRegistered = FALSE;
static void ucln_atexit_handler()
{
ucln_cleanupOne(UCLN_TYPE);
UCLN_CLEAN_ME_UP;
}
static void ucln_registerAutomaticCleanup()
@ -101,7 +107,7 @@ U_CAPI void U_EXPORT2 UCLN_FINI (void);
U_CAPI void U_EXPORT2 UCLN_FINI ()
{
/* This function must be defined, if UCLN_FINI is defined, else link error. */
ucln_cleanupOne(UCLN_TYPE);
UCLN_CLEAN_ME_UP;
}
#elif defined(__GNUC__)
/* GCC - use __attribute((destructor)) */
@ -109,7 +115,7 @@ static void ucln_destructor() __attribute__((destructor)) ;
static void ucln_destructor()
{
ucln_cleanupOne(UCLN_TYPE);
UCLN_CLEAN_ME_UP;
}
/* Windows: DllMain */
@ -145,7 +151,7 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
case DLL_PROCESS_DETACH:
/* Here is the one we actually care about. */
ucln_cleanupOne(UCLN_TYPE);
UCLN_CLEAN_ME_UP;
break;

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2000-2009, International Business Machines
* Copyright (C) 2000-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnvisci.c
@ -24,7 +24,6 @@
#include "unicode/ucnv.h"
#include "ucnv_cnv.h"
#include "unicode/ucnv_cb.h"
#include "unicode/uset.h"
#include "cstring.h"
#define UCNV_OPTIONS_VERSION_MASK 0xf
@ -63,9 +62,6 @@
#define PNJ_HA 0x0A39
#define PNJ_RRA 0x0A5C
static USet* PNJ_BINDI_TIPPI_SET= NULL;
static USet* PNJ_CONSONANT_SET= NULL;
typedef enum {
DEVANAGARI =0,
BENGALI,
@ -151,24 +147,40 @@ static const LookupDataStruct lookupInitialData[]={
{ MALAYALAM, MLM_MASK, MLM }
};
static void initializeSets() {
/* TODO: Replace the following two lines with PNJ_CONSONANT_SET = uset_openEmpty(); */
PNJ_CONSONANT_SET = uset_open(0,0);
uset_clear(PNJ_CONSONANT_SET);
/*
* For special handling of certain Gurmukhi characters.
* Bit 0 (value 1): PNJ consonant
* Bit 1 (value 2): PNJ Bindi Tippi
*/
static const uint8_t pnjMap[80] = {
/* 0A00..0A0F */
0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0A10..0A1F */
0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
/* 0A20..0A2F */
3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3,
/* 0A30..0A3F */
3, 0, 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 2,
/* 0A40..0A4F */
0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
uset_addRange(PNJ_CONSONANT_SET, 0x0A15, 0x0A28);
uset_addRange(PNJ_CONSONANT_SET, 0x0A2A, 0x0A30);
uset_addRange(PNJ_CONSONANT_SET, 0x0A35, 0x0A36);
uset_addRange(PNJ_CONSONANT_SET, 0x0A38, 0x0A39);
PNJ_BINDI_TIPPI_SET = uset_clone(PNJ_CONSONANT_SET);
uset_add(PNJ_BINDI_TIPPI_SET, 0x0A05);
uset_add(PNJ_BINDI_TIPPI_SET, 0x0A07);
uset_add(PNJ_BINDI_TIPPI_SET, 0x0A3F);
uset_addRange(PNJ_BINDI_TIPPI_SET, 0x0A41, 0x0A42);
uset_compact(PNJ_CONSONANT_SET);
uset_compact(PNJ_BINDI_TIPPI_SET);
static UBool
isPNJConsonant(UChar32 c) {
if (c < 0xa00 || 0xa50 <= c) {
return FALSE;
} else {
return (UBool)(pnjMap[c - 0xa00] & 1);
}
}
static UBool
isPNJBindiTippi(UChar32 c) {
if (c < 0xa00 || 0xa50 <= c) {
return FALSE;
} else {
return (UBool)(pnjMap[c - 0xa00] >> 1);
}
}
static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
@ -176,9 +188,6 @@ static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *e
return;
}
/* Ensure that the sets used in special handling of certain Gurmukhi characters are initialized. */
initializeSets();
cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
if (cnv->extraInfo != NULL) {
@ -225,14 +234,6 @@ static void _ISCIIClose(UConverter *cnv) {
}
cnv->extraInfo=NULL;
}
if (PNJ_CONSONANT_SET != NULL) {
uset_close(PNJ_CONSONANT_SET);
PNJ_CONSONANT_SET = NULL;
}
if (PNJ_BINDI_TIPPI_SET != NULL) {
uset_close(PNJ_BINDI_TIPPI_SET);
PNJ_BINDI_TIPPI_SET = NULL;
}
}
static const char* _ISCIIgetName(const UConverter* cnv) {
@ -1031,7 +1032,7 @@ static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
converterData->contextCharFromUnicode = 0x00;
break;
}
if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && uset_contains(PNJ_CONSONANT_SET, (sourceChar + PNJ_DELTA))) {
if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) {
/* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
/* reset context char */
converterData->contextCharFromUnicode = 0x0000;
@ -1425,7 +1426,7 @@ static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *ar
if (*toUnicodeStatus != missingCharMarker) {
/* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && uset_contains(PNJ_CONSONANT_SET, data->prevToUnicodeStatus) &&
if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) &&
(*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) {
/* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
offset = (int)(source-args->source - 3);
@ -1444,10 +1445,10 @@ static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *ar
/* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.
* If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
*/
if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && uset_contains(PNJ_BINDI_TIPPI_SET, (*toUnicodeStatus + PNJ_DELTA))) {
if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) {
targetUniChar = PNJ_TIPPI - PNJ_DELTA;
WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
} else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && uset_contains(PNJ_CONSONANT_SET, (*toUnicodeStatus + PNJ_DELTA))) {
} else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) {
/* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
} else {

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2008-2009, International Business Machines
* Copyright (C) 2008-2011, International Business Machines
* Corporation, Google and others. All Rights Reserved.
*
*******************************************************************************
@ -26,6 +26,8 @@
#include "unicode/ucnvsel.h"
#if !UCONFIG_NO_CONVERSION
#include <string.h>
#include "unicode/uchar.h"
@ -809,3 +811,5 @@ ucnvsel_selectForUTF8(const UConverterSelector* sel,
}
return selectForMask(sel, mask, status);
}
#endif // !UCONFIG_NO_CONVERSION

View file

@ -1,6 +1,6 @@
/*
******************************************************************************
* Copyright (C) 1997-2010, International Business Machines
* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* Date Name Description
@ -15,6 +15,7 @@
#include "cstring.h"
#include "cmemory.h"
#include "uassert.h"
#include "ustr_imp.h"
/* This hashtable is implemented as a double hash. All elements are
* stored in a single array with no secondary storage for collision
@ -832,58 +833,26 @@ uhash_tokp(void* p) {
* PUBLIC Key Hash Functions
********************************************************************/
/*
Compute the hash by iterating sparsely over about 32 (up to 63)
characters spaced evenly through the string. For each character,
multiply the previous hash value by a prime number and add the new
character in, like a linear congruential random number generator,
producing a pseudorandom deterministic value well distributed over
the output range. [LIU]
*/
#define STRING_HASH(TYPE, STR, STRLEN, DEREF) \
int32_t hash = 0; \
const TYPE *p = (const TYPE*) STR; \
if (p != NULL) { \
int32_t len = (int32_t)(STRLEN); \
int32_t inc = ((len - 32) / 32) + 1; \
const TYPE *limit = p + len; \
while (p<limit) { \
hash = (hash * 37) + DEREF; \
p += inc; \
} \
} \
return hash
U_CAPI int32_t U_EXPORT2
uhash_hashUChars(const UHashTok key) {
STRING_HASH(UChar, key.pointer, u_strlen(p), *p);
}
/* Used by UnicodeString to compute its hashcode - Not public API. */
U_CAPI int32_t U_EXPORT2
uhash_hashUCharsN(const UChar *str, int32_t length) {
STRING_HASH(UChar, str, length, *p);
}
U_CAPI int32_t U_EXPORT2
uhash_hashCharsN(const char *str, int32_t length) {
STRING_HASH(char, str, length, *p);
const UChar *s = (const UChar *)key.pointer;
return s == NULL ? 0 : ustr_hashUCharsN(s, u_strlen(s));
}
U_CAPI int32_t U_EXPORT2
uhash_hashChars(const UHashTok key) {
STRING_HASH(uint8_t, key.pointer, uprv_strlen((char*)p), *p);
const char *s = (const char *)key.pointer;
return s == NULL ? 0 : ustr_hashCharsN(s, uprv_strlen(s));
}
U_CAPI int32_t U_EXPORT2
uhash_hashIChars(const UHashTok key) {
STRING_HASH(uint8_t, key.pointer, uprv_strlen((char*)p), uprv_tolower(*p));
const char *s = (const char *)key.pointer;
return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s));
}
U_CAPI UBool U_EXPORT2
uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
int32_t count1, count2, pos, i;
if(hash1==hash2){
@ -1002,13 +971,3 @@ U_CAPI UBool U_EXPORT2
uhash_compareLong(const UHashTok key1, const UHashTok key2) {
return (UBool)(key1.integer == key2.integer);
}
/********************************************************************
* PUBLIC Deleter Functions
********************************************************************/
U_CAPI void U_EXPORT2
uhash_freeBlock(void *obj) {
uprv_free(obj);
}

View file

@ -1,6 +1,6 @@
/*
******************************************************************************
* Copyright (C) 1997-2010, International Business Machines
* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* Date Name Description
@ -14,6 +14,7 @@
#define UHASH_H
#include "unicode/utypes.h"
#include "cmemory.h"
/**
* UHashtable stores key-value pairs and does moderately fast lookup
@ -125,14 +126,8 @@ typedef UBool U_CALLCONV UKeyComparator(const UHashTok key1,
*/
typedef UBool U_CALLCONV UValueComparator(const UHashTok val1,
const UHashTok val2);
/**
* A function called by <TT>uhash_remove</TT>,
* <TT>uhash_close</TT>, or <TT>uhash_put</TT> to delete
* an existing key or value.
* @param obj A key or value stored in a hashtable
* @see uhash_deleteUObject
*/
typedef void U_CALLCONV UObjectDeleter(void* obj);
/* see cmemory.h for UObjectDeleter and uprv_deleteUObject() */
/**
* This specifies whether or not, and how, the hastable resizes itself.
@ -579,13 +574,6 @@ uhash_hashUChars(const UHashTok key);
U_CAPI int32_t U_EXPORT2
uhash_hashChars(const UHashTok key);
/* Used by UnicodeString to compute its hashcode - Not public API. */
U_CAPI int32_t U_EXPORT2
uhash_hashUCharsN(const UChar *key, int32_t length);
U_CAPI int32_t U_EXPORT2
uhash_hashCharsN(const char *key, int32_t length);
/**
* Generate a case-insensitive hash code for a null-terminated char*
* string. If the string is not null-terminated do not use this
@ -666,13 +654,6 @@ uhash_compareUnicodeString(const UHashTok key1, const UHashTok key2);
U_CAPI UBool U_EXPORT2
uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2);
/**
* Deleter function for UnicodeString* keys or values.
* @param obj The object to be deleted
*/
U_CAPI void U_EXPORT2
uhash_deleteUnicodeString(void *obj);
/********************************************************************
* int32_t Support Functions
********************************************************************/
@ -705,20 +686,7 @@ uhash_compareLong(const UHashTok key1, const UHashTok key2);
U_CAPI void U_EXPORT2
uhash_deleteHashtable(void *obj);
/**
* Deleter for UObject instances.
* @param obj The object to be deleted
*/
U_CAPI void U_EXPORT2
uhash_deleteUObject(void *obj);
/**
* Deleter for any key or value allocated using uprv_malloc. Calls
* uprv_free.
* @param obj The object to be deleted
*/
U_CAPI void U_EXPORT2
uhash_freeBlock(void *obj);
/* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */
/**
* Checks if the given hash tables are equal or not.

View file

@ -1,6 +1,6 @@
/*
******************************************************************************
* Copyright (C) 1997-2010, International Business Machines
* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* Date Name Description
@ -10,42 +10,7 @@
******************************************************************************
*/
#include "uhash.h"
#include "hash.h"
#include "uvector.h"
#include "unicode/unistr.h"
#include "unicode/uchar.h"
/********************************************************************
* PUBLIC UnicodeString support functions for UHashtable
********************************************************************/
U_CAPI int32_t U_EXPORT2
uhash_hashUnicodeString(const UHashTok key) {
U_NAMESPACE_USE
const UnicodeString *str = (const UnicodeString*) key.pointer;
return (str == NULL) ? 0 : str->hashCode();
}
U_CAPI void U_EXPORT2
uhash_deleteUnicodeString(void *obj) {
U_NAMESPACE_USE
delete (UnicodeString*) obj;
}
U_CAPI UBool U_EXPORT2
uhash_compareUnicodeString(const UHashTok key1, const UHashTok key2) {
U_NAMESPACE_USE
const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
if (str1 == str2) {
return TRUE;
}
if (str1 == NULL || str2 == NULL) {
return FALSE;
}
return *str1 == *str2;
}
/**
* Deleter for Hashtable objects.
@ -56,13 +21,4 @@ uhash_deleteHashtable(void *obj) {
delete (Hashtable*) obj;
}
/**
* Deleter for UObject instances.
*/
U_CAPI void U_EXPORT2
uhash_deleteUObject(void *obj) {
U_NAMESPACE_USE
delete (UObject*) obj;
}
//eof

View file

@ -1,7 +1,7 @@
/*
******************************************************************************
* *
* Copyright (C) 2001-2010, International Business Machines *
* Copyright (C) 2001-2011, International Business Machines *
* Corporation and others. All Rights Reserved. *
* *
******************************************************************************
@ -19,40 +19,11 @@
#include "unicode/uclean.h"
#include "cmemory.h"
#include "icuplugimp.h"
#include "uassert.h"
#include "ucln.h"
#include "ucln_cmn.h"
#include "ucnv_io.h"
#include "umutex.h"
#include "utracimp.h"
static UBool gICUInitialized = FALSE;
static UMTX gICUInitMutex = NULL;
/************************************************
The cleanup order is important in this function.
Please be sure that you have read ucln.h
************************************************/
U_CAPI void U_EXPORT2
u_cleanup(void)
{
UTRACE_ENTRY_OC(UTRACE_U_CLEANUP);
umtx_lock(NULL); /* Force a memory barrier, so that we are sure to see */
umtx_unlock(NULL); /* all state left around by any other threads. */
ucln_lib_cleanup();
umtx_destroy(&gICUInitMutex);
umtx_cleanup();
cmemory_cleanup(); /* undo any heap functions set by u_setMemoryFunctions(). */
gICUInitialized = FALSE;
UTRACE_EXIT(); /* Must be before utrace_cleanup(), which turns off tracing. */
/*#if U_ENABLE_TRACING*/
utrace_cleanup();
/*#endif*/
}
/*
* ICU Initialization Function. Need not be called.
*/

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2008-2010, International Business Machines
* Copyright (C) 2008-2011, International Business Machines
* Corporation, Google and others. All Rights Reserved.
*
*******************************************************************************
@ -19,8 +19,11 @@
#ifndef __ICU_UCNV_SEL_H__
#define __ICU_UCNV_SEL_H__
#include "unicode/uset.h"
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
#include "unicode/uset.h"
#include "unicode/utf16.h"
#include "unicode/uenum.h"
#include "unicode/ucnv.h"
@ -179,4 +182,6 @@ U_STABLE UEnumeration * U_EXPORT2
ucnvsel_selectForUTF8(const UConverterSelector* sel,
const char *s, int32_t length, UErrorCode *status);
#endif /* !UCONFIG_NO_CONVERSION */
#endif /* __ICU_UCNV_SEL_H__ */

View file

@ -24,6 +24,7 @@ U_NAMESPACE_BEGIN
class BMPSet;
class ParsePosition;
class RBBIRuleScanner;
class SymbolTable;
class UnicodeSetStringSpan;
class UVector;
@ -1467,6 +1468,7 @@ private:
virtual UBool matchesIndexValue(uint8_t v) const;
private:
friend class RBBIRuleScanner;
//----------------------------------------------------------------
// Implementation: Clone as thawed (see ICU4J Freezable)
@ -1478,10 +1480,16 @@ private:
// Implementation: Pattern parsing
//----------------------------------------------------------------
void applyPatternIgnoreSpace(const UnicodeString& pattern,
ParsePosition& pos,
const SymbolTable* symbols,
UErrorCode& status);
void applyPattern(RuleCharacterIterator& chars,
const SymbolTable* symbols,
UnicodeString& rebuiltPat,
uint32_t options,
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
UErrorCode& ec);
//----------------------------------------------------------------

View file

@ -31,6 +31,7 @@
#include "unicode/std_string.h"
#include "unicode/stringpiece.h"
#include "unicode/bytestream.h"
#include "unicode/ucasemap.h"
struct UConverter; // unicode/ucnv.h
class StringThreadTest;
@ -53,6 +54,21 @@ U_STABLE int32_t U_EXPORT2
u_strlen(const UChar *s);
#endif
#ifndef U_STRING_CASE_MAPPER_DEFINED
#define U_STRING_CASE_MAPPER_DEFINED
/**
* Internal string case mapping function type.
* @internal
*/
typedef int32_t U_CALLCONV
UStringCaseMapper(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode);
#endif
U_NAMESPACE_BEGIN
class BreakIterator; // unicode/brkiter.h
@ -3355,12 +3371,13 @@ private:
int32_t **pBufferToDelete = 0,
UBool forceClone = FALSE);
// common function for case mappings
/**
* Common function for UnicodeString case mappings.
* The stringCaseMapper has the same type UStringCaseMapper
* as in ustr_imp.h for ustrcase_map().
*/
UnicodeString &
caseMap(BreakIterator *titleIter,
const char *locale,
uint32_t options,
int32_t toWhichCase);
caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
// ref counting
void addRef(void);

View file

@ -1559,7 +1559,7 @@ UBool UnicodeSet::allocateStrings(UErrorCode &status) {
if (U_FAILURE(status)) {
return FALSE;
}
strings = new UVector(uhash_deleteUnicodeString,
strings = new UVector(uprv_deleteUObject,
uhash_compareUnicodeString, 1, status);
if (strings == NULL) { // Check for memory allocation error.
status = U_MEMORY_ALLOCATION_ERROR;

View file

@ -0,0 +1,280 @@
/*
*******************************************************************************
*
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uniset_closure.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2011may30
* created by: Markus W. Scherer
*
* UnicodeSet::closeOver() and related methods moved here from uniset_props.cpp
* to simplify dependencies.
* In particular, this depends on the BreakIterator, but the BreakIterator
* code also builds UnicodeSets from patterns and needs uniset_props.
*/
#include "unicode/brkiter.h"
#include "unicode/locid.h"
#include "unicode/parsepos.h"
#include "unicode/uniset.h"
#include "cmemory.h"
#include "ruleiter.h"
#include "ucase.h"
#include "util.h"
#include "uvector.h"
// initial storage. Must be >= 0
// *** same as in uniset.cpp ! ***
#define START_EXTRA 16
U_NAMESPACE_BEGIN
// TODO memory debugging provided inside uniset.cpp
// could be made available here but probably obsolete with use of modern
// memory leak checker tools
#define _dbgct(me)
//----------------------------------------------------------------
// Constructors &c
//----------------------------------------------------------------
UnicodeSet::UnicodeSet(const UnicodeString& pattern,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status) :
len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
fFlags(0)
{
if(U_SUCCESS(status)){
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
/* test for NULL */
if(list == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}else{
allocateStrings(status);
applyPattern(pattern, options, symbols, status);
}
}
_dbgct(this);
}
UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status) :
len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
fFlags(0)
{
if(U_SUCCESS(status)){
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
/* test for NULL */
if(list == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}else{
allocateStrings(status);
applyPattern(pattern, pos, options, symbols, status);
}
}
_dbgct(this);
}
//----------------------------------------------------------------
// Public API
//----------------------------------------------------------------
UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status) {
ParsePosition pos(0);
applyPattern(pattern, pos, options, symbols, status);
if (U_FAILURE(status)) return *this;
int32_t i = pos.getIndex();
if (options & USET_IGNORE_SPACE) {
// Skip over trailing whitespace
ICU_Utility::skipWhitespace(pattern, i, TRUE);
}
if (i != pattern.length()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
}
return *this;
}
UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
ParsePosition& pos,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status) {
if (U_FAILURE(status)) {
return *this;
}
if (isFrozen()) {
status = U_NO_WRITE_PERMISSION;
return *this;
}
// Need to build the pattern in a temporary string because
// _applyPattern calls add() etc., which set pat to empty.
UnicodeString rebuiltPat;
RuleCharacterIterator chars(pattern, symbols, pos);
applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status);
if (U_FAILURE(status)) return *this;
if (chars.inVariable()) {
// syntaxError(chars, "Extra chars in variable value");
status = U_MALFORMED_SET;
return *this;
}
setPattern(rebuiltPat);
return *this;
}
// USetAdder implementation
// Does not use uset.h to reduce code dependencies
static void U_CALLCONV
_set_add(USet *set, UChar32 c) {
((UnicodeSet *)set)->add(c);
}
static void U_CALLCONV
_set_addRange(USet *set, UChar32 start, UChar32 end) {
((UnicodeSet *)set)->add(start, end);
}
static void U_CALLCONV
_set_addString(USet *set, const UChar *str, int32_t length) {
((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
}
//----------------------------------------------------------------
// Case folding API
//----------------------------------------------------------------
// add the result of a full case mapping to the set
// use str as a temporary string to avoid constructing one
static inline void
addCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString &str) {
if(result >= 0) {
if(result > UCASE_MAX_STRING_LENGTH) {
// add a single-code point case mapping
set.add(result);
} else {
// add a string case mapping from full with length result
str.setTo((UBool)FALSE, full, result);
set.add(str);
}
}
// result < 0: the code point mapped to itself, no need to add it
// see ucase.h
}
UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
if (isFrozen() || isBogus()) {
return *this;
}
if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) {
const UCaseProps *csp = ucase_getSingleton();
{
UnicodeSet foldSet(*this);
UnicodeString str;
USetAdder sa = {
foldSet.toUSet(),
_set_add,
_set_addRange,
_set_addString,
NULL, // don't need remove()
NULL // don't need removeRange()
};
// start with input set to guarantee inclusion
// USET_CASE: remove strings because the strings will actually be reduced (folded);
// therefore, start with no strings and add only those needed
if (attribute & USET_CASE_INSENSITIVE) {
foldSet.strings->removeAllElements();
}
int32_t n = getRangeCount();
UChar32 result;
const UChar *full;
int32_t locCache = 0;
for (int32_t i=0; i<n; ++i) {
UChar32 start = getRangeStart(i);
UChar32 end = getRangeEnd(i);
if (attribute & USET_CASE_INSENSITIVE) {
// full case closure
for (UChar32 cp=start; cp<=end; ++cp) {
ucase_addCaseClosure(csp, cp, &sa);
}
} else {
// add case mappings
// (does not add long s for regular s, or Kelvin for k, for example)
for (UChar32 cp=start; cp<=end; ++cp) {
result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache);
addCaseMapping(foldSet, result, full, str);
result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache);
addCaseMapping(foldSet, result, full, str);
result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache);
addCaseMapping(foldSet, result, full, str);
result = ucase_toFullFolding(csp, cp, &full, 0);
addCaseMapping(foldSet, result, full, str);
}
}
}
if (strings != NULL && strings->size() > 0) {
if (attribute & USET_CASE_INSENSITIVE) {
for (int32_t j=0; j<strings->size(); ++j) {
str = *(const UnicodeString *) strings->elementAt(j);
str.foldCase();
if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) {
foldSet.add(str); // does not map to code points: add the folded string itself
}
}
} else {
Locale root("");
#if !UCONFIG_NO_BREAK_ITERATION
UErrorCode status = U_ZERO_ERROR;
BreakIterator *bi = BreakIterator::createWordInstance(root, status);
if (U_SUCCESS(status)) {
#endif
const UnicodeString *pStr;
for (int32_t j=0; j<strings->size(); ++j) {
pStr = (const UnicodeString *) strings->elementAt(j);
(str = *pStr).toLower(root);
foldSet.add(str);
#if !UCONFIG_NO_BREAK_ITERATION
(str = *pStr).toTitle(bi, root);
foldSet.add(str);
#endif
(str = *pStr).toUpper(root);
foldSet.add(str);
(str = *pStr).foldCase();
foldSet.add(str);
}
#if !UCONFIG_NO_BREAK_ITERATION
}
delete bi;
#endif
}
}
*this = foldSet;
}
}
return *this;
}
U_NAMESPACE_END

View file

@ -331,65 +331,15 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern,
len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
fFlags(0)
{
if(U_SUCCESS(status)){
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
/* test for NULL */
if(list == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}else{
allocateStrings(status);
applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);
}
}
_dbgct(this);
}
/**
* Constructs a set from the given pattern, optionally ignoring
* white space. See the class description for the syntax of the
* pattern language.
* @param pattern a string specifying what characters are in the set
* @param options bitmask for options to apply to the pattern.
* Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
*/
UnicodeSet::UnicodeSet(const UnicodeString& pattern,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status) :
len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
fFlags(0)
{
if(U_SUCCESS(status)){
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
/* test for NULL */
if(list == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}else{
allocateStrings(status);
applyPattern(pattern, options, symbols, status);
}
}
_dbgct(this);
}
UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status) :
len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
fFlags(0)
{
if(U_SUCCESS(status)){
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
/* test for NULL */
if(list == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
status = U_MEMORY_ALLOCATION_ERROR;
}else{
allocateStrings(status);
applyPattern(pattern, pos, options, symbols, status);
applyPattern(pattern, status);
}
}
_dbgct(this);
@ -401,64 +351,46 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
UErrorCode& status) {
return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);
}
/**
* Modifies this set to represent the set specified by the given
* pattern, optionally ignoring white space. See the class
* description for the syntax of the pattern language.
* @param pattern a string specifying what characters are in the set
* @param options bitmask for options to apply to the pattern.
* Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
*/
UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status) {
if (U_FAILURE(status) || isFrozen()) {
return *this;
}
// Equivalent to
// return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);
// but without dependency on closeOver().
ParsePosition pos(0);
applyPattern(pattern, pos, options, symbols, status);
applyPatternIgnoreSpace(pattern, pos, NULL, status);
if (U_FAILURE(status)) return *this;
int32_t i = pos.getIndex();
if (options & USET_IGNORE_SPACE) {
// Skip over trailing whitespace
ICU_Utility::skipWhitespace(pattern, i, TRUE);
}
// Skip over trailing whitespace
ICU_Utility::skipWhitespace(pattern, i, TRUE);
if (i != pattern.length()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
}
return *this;
}
UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
ParsePosition& pos,
uint32_t options,
const SymbolTable* symbols,
UErrorCode& status) {
if (U_FAILURE(status) || isFrozen()) {
return *this;
void
UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern,
ParsePosition& pos,
const SymbolTable* symbols,
UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
if (isFrozen()) {
status = U_NO_WRITE_PERMISSION;
return;
}
// Need to build the pattern in a temporary string because
// _applyPattern calls add() etc., which set pat to empty.
UnicodeString rebuiltPat;
RuleCharacterIterator chars(pattern, symbols, pos);
applyPattern(chars, symbols, rebuiltPat, options, status);
if (U_FAILURE(status)) return *this;
applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status);
if (U_FAILURE(status)) return;
if (chars.inVariable()) {
// syntaxError(chars, "Extra chars in variable value");
status = U_MALFORMED_SET;
return *this;
return;
}
setPattern(rebuiltPat);
return *this;
}
/**
@ -511,6 +443,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
const SymbolTable* symbols,
UnicodeString& rebuiltPat,
uint32_t options,
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
UErrorCode& ec) {
if (U_FAILURE(ec)) return;
@ -647,7 +580,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
}
switch (setMode) {
case 1:
nested->applyPattern(chars, symbols, patLocal, options, ec);
nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec);
break;
case 2:
chars.skipIgnored(opts);
@ -879,10 +812,10 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
* patterns like /[^abc]/i work.
*/
if ((options & USET_CASE_INSENSITIVE) != 0) {
closeOver(USET_CASE_INSENSITIVE);
(this->*caseClosure)(USET_CASE_INSENSITIVE);
}
else if ((options & USET_ADD_CASE_MAPPINGS) != 0) {
closeOver(USET_ADD_CASE_MAPPINGS);
(this->*caseClosure)(USET_ADD_CASE_MAPPINGS);
}
if (invert) {
complement();
@ -1365,126 +1298,4 @@ void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars,
rebuiltPat.append(pattern, 0, pos.getIndex());
}
//----------------------------------------------------------------
// Case folding API
//----------------------------------------------------------------
// add the result of a full case mapping to the set
// use str as a temporary string to avoid constructing one
static inline void
addCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString &str) {
if(result >= 0) {
if(result > UCASE_MAX_STRING_LENGTH) {
// add a single-code point case mapping
set.add(result);
} else {
// add a string case mapping from full with length result
str.setTo((UBool)FALSE, full, result);
set.add(str);
}
}
// result < 0: the code point mapped to itself, no need to add it
// see ucase.h
}
UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
if (isFrozen() || isBogus()) {
return *this;
}
if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) {
const UCaseProps *csp = ucase_getSingleton();
{
UnicodeSet foldSet(*this);
UnicodeString str;
USetAdder sa = {
foldSet.toUSet(),
_set_add,
_set_addRange,
_set_addString,
NULL, // don't need remove()
NULL // don't need removeRange()
};
// start with input set to guarantee inclusion
// USET_CASE: remove strings because the strings will actually be reduced (folded);
// therefore, start with no strings and add only those needed
if (attribute & USET_CASE_INSENSITIVE) {
foldSet.strings->removeAllElements();
}
int32_t n = getRangeCount();
UChar32 result;
const UChar *full;
int32_t locCache = 0;
for (int32_t i=0; i<n; ++i) {
UChar32 start = getRangeStart(i);
UChar32 end = getRangeEnd(i);
if (attribute & USET_CASE_INSENSITIVE) {
// full case closure
for (UChar32 cp=start; cp<=end; ++cp) {
ucase_addCaseClosure(csp, cp, &sa);
}
} else {
// add case mappings
// (does not add long s for regular s, or Kelvin for k, for example)
for (UChar32 cp=start; cp<=end; ++cp) {
result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache);
addCaseMapping(foldSet, result, full, str);
result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache);
addCaseMapping(foldSet, result, full, str);
result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache);
addCaseMapping(foldSet, result, full, str);
result = ucase_toFullFolding(csp, cp, &full, 0);
addCaseMapping(foldSet, result, full, str);
}
}
}
if (strings != NULL && strings->size() > 0) {
if (attribute & USET_CASE_INSENSITIVE) {
for (int32_t j=0; j<strings->size(); ++j) {
str = *(const UnicodeString *) strings->elementAt(j);
str.foldCase();
if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) {
foldSet.add(str); // does not map to code points: add the folded string itself
}
}
} else {
Locale root("");
#if !UCONFIG_NO_BREAK_ITERATION
UErrorCode status = U_ZERO_ERROR;
BreakIterator *bi = BreakIterator::createWordInstance(root, status);
if (U_SUCCESS(status)) {
#endif
const UnicodeString *pStr;
for (int32_t j=0; j<strings->size(); ++j) {
pStr = (const UnicodeString *) strings->elementAt(j);
(str = *pStr).toLower(root);
foldSet.add(str);
#if !UCONFIG_NO_BREAK_ITERATION
(str = *pStr).toTitle(bi, root);
foldSet.add(str);
#endif
(str = *pStr).toUpper(root);
foldSet.add(str);
(str = *pStr).foldCase();
foldSet.add(str);
}
#if !UCONFIG_NO_BREAK_ITERATION
}
delete bi;
#endif
}
}
*this = foldSet;
}
}
return *this;
}
U_NAMESPACE_END

View file

@ -1482,7 +1482,7 @@ UnicodeString::doHashCode() const
{
/* Delegate hash computation to uhash. This makes UnicodeString
* hashing consistent with UChar* hashing. */
int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length());
int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
if (hashCode == kInvalidHashCode) {
hashCode = kEmptyHashCode;
}
@ -1674,6 +1674,29 @@ UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
U_NAMESPACE_END
U_NAMESPACE_USE
U_CAPI int32_t U_EXPORT2
uhash_hashUnicodeString(const UHashTok key) {
const UnicodeString *str = (const UnicodeString*) key.pointer;
return (str == NULL) ? 0 : str->hashCode();
}
// Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
// does not depend on hashtable code.
U_CAPI UBool U_EXPORT2
uhash_compareUnicodeString(const UHashTok key1, const UHashTok key2) {
const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
if (str1 == str2) {
return TRUE;
}
if (str1 == NULL || str2 == NULL) {
return FALSE;
}
return *str1 == *str2;
}
#ifdef U_STATIC_IMPLEMENTATION
/*
This should never be called. It is defined here to make sure that the
@ -1683,7 +1706,6 @@ but defining it here makes sure that it is included with this object file.
This makes sure that static library dependencies are kept to a minimum.
*/
static void uprv_UnicodeStringDummy(void) {
U_NAMESPACE_USE
delete [] (new UnicodeString[2]);
}
#endif

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2010, International Business Machines
* Copyright (C) 1999-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -18,13 +18,11 @@
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "unicode/locid.h"
#include "cstring.h"
#include "cmemory.h"
#include "unicode/ustring.h"
#include "unicode/unistr.h"
#include "unicode/uchar.h"
#include "unicode/ubrk.h"
#include "ustr_imp.h"
#include "uhash.h"
@ -84,23 +82,14 @@ UnicodeString::doCaseCompare(int32_t start,
// Write implementation
//========================================
/*
* Implement argument checking and buffer handling
* for string case mapping as a common function.
*/
UnicodeString &
UnicodeString::caseMap(BreakIterator *titleIter,
const char *locale,
uint32_t options,
int32_t toWhichCase) {
UnicodeString::caseMap(const UCaseMap *csm,
UStringCaseMapper *stringCaseMapper) {
if(isEmpty() || !isWritable()) {
// nothing to do
return *this;
}
const UCaseProps *csp=ucase_getSingleton();
// We need to allocate a new buffer for the internal string case mapping function.
// This is very similar to how doReplace() keeps the old array pointer
// and deletes the old array itself after it is done.
@ -135,28 +124,8 @@ UnicodeString::caseMap(BreakIterator *titleIter,
int32_t newLength;
do {
errorCode = U_ZERO_ERROR;
if(toWhichCase==TO_LOWER) {
newLength = ustr_toLower(csp, getArrayStart(), getCapacity(),
oldArray, oldLength,
locale, &errorCode);
} else if(toWhichCase==TO_UPPER) {
newLength = ustr_toUpper(csp, getArrayStart(), getCapacity(),
oldArray, oldLength,
locale, &errorCode);
} else if(toWhichCase==TO_TITLE) {
#if UCONFIG_NO_BREAK_ITERATION
errorCode=U_UNSUPPORTED_ERROR;
#else
newLength = ustr_toTitle(csp, getArrayStart(), getCapacity(),
oldArray, oldLength,
(UBreakIterator *)titleIter, locale, options, &errorCode);
#endif
} else {
newLength = ustr_foldCase(csp, getArrayStart(), getCapacity(),
oldArray, oldLength,
options,
&errorCode);
}
newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(),
oldArray, oldLength, &errorCode);
setLength(newLength);
} while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
@ -169,49 +138,12 @@ UnicodeString::caseMap(BreakIterator *titleIter,
return *this;
}
UnicodeString &
UnicodeString::toLower() {
return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER);
}
UnicodeString &
UnicodeString::toLower(const Locale &locale) {
return caseMap(0, locale.getName(), 0, TO_LOWER);
}
UnicodeString &
UnicodeString::toUpper() {
return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER);
}
UnicodeString &
UnicodeString::toUpper(const Locale &locale) {
return caseMap(0, locale.getName(), 0, TO_UPPER);
}
#if !UCONFIG_NO_BREAK_ITERATION
UnicodeString &
UnicodeString::toTitle(BreakIterator *titleIter) {
return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE);
}
UnicodeString &
UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
return caseMap(titleIter, locale.getName(), 0, TO_TITLE);
}
UnicodeString &
UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
return caseMap(titleIter, locale.getName(), options, TO_TITLE);
}
#endif
UnicodeString &
UnicodeString::foldCase(uint32_t options) {
/* The Locale parameter isn't used. Use "" instead. */
return caseMap(0, "", options, FOLD_CASE);
UCaseMap csm=UCASEMAP_INITIALIZER;
csm.csp=ucase_getSingleton();
csm.options=options;
return caseMap(&csm, ustrcase_internalFold);
}
U_NAMESPACE_END
@ -244,4 +176,3 @@ uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) {
}
return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
}

View file

@ -0,0 +1,70 @@
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: unistr_case_locale.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2011may31
* created by: Markus W. Scherer
*
* Locale-sensitive case mapping functions (ones that call uloc_getDefault())
* were moved here to break dependency cycles among parts of the common library.
*/
#include "unicode/utypes.h"
#include "unicode/locid.h"
#include "unicode/unistr.h"
#include "cmemory.h"
#include "ustr_imp.h"
U_NAMESPACE_BEGIN
//========================================
// Write implementation
//========================================
/*
* Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
* Do this fast because it is called with every function call.
*/
static inline void
setTempCaseMap(UCaseMap *csm, const char *locale) {
if(csm->csp==NULL) {
csm->csp=ucase_getSingleton();
}
if(locale!=NULL && locale[0]==0) {
csm->locale[0]=0;
} else {
ustrcase_setTempCaseMapLocale(csm, locale);
}
}
UnicodeString &
UnicodeString::toLower() {
return toLower(Locale::getDefault());
}
UnicodeString &
UnicodeString::toLower(const Locale &locale) {
UCaseMap csm=UCASEMAP_INITIALIZER;
setTempCaseMap(&csm, locale.getName());
return caseMap(&csm, ustrcase_internalToLower);
}
UnicodeString &
UnicodeString::toUpper() {
return toUpper(Locale::getDefault());
}
UnicodeString &
UnicodeString::toUpper(const Locale &locale) {
UCaseMap csm=UCASEMAP_INITIALIZER;
setTempCaseMap(&csm, locale.getName());
return caseMap(&csm, ustrcase_internalToUpper);
}
U_NAMESPACE_END

View file

@ -0,0 +1,90 @@
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: unistr_titlecase_brkiter.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:2
*
* created on: 2011may30
* created by: Markus W. Scherer
*
* Titlecasing functions that are based on BreakIterator
* were moved here to break dependency cycles among parts of the common library.
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/brkiter.h"
#include "unicode/ubrk.h"
#include "unicode/unistr.h"
#include "unicode/ustring.h"
#include "cmemory.h"
#include "ustr_imp.h"
static int32_t U_CALLCONV
unistr_case_internalToTitle(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode) {
ubrk_setText(csm->iter, src, srcLength, pErrorCode);
return ustrcase_internalToTitle(csm, dest, destCapacity, src, srcLength, pErrorCode);
}
/*
* Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
* Do this fast because it is called with every function call.
*/
static inline void
setTempCaseMap(UCaseMap *csm, const char *locale) {
if(csm->csp==NULL) {
csm->csp=ucase_getSingleton();
}
if(locale!=NULL && locale[0]==0) {
csm->locale[0]=0;
} else {
ustrcase_setTempCaseMapLocale(csm, locale);
}
}
U_NAMESPACE_BEGIN
UnicodeString &
UnicodeString::toTitle(BreakIterator *titleIter) {
return toTitle(titleIter, Locale::getDefault(), 0);
}
UnicodeString &
UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
return toTitle(titleIter, locale, 0);
}
UnicodeString &
UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
UCaseMap csm=UCASEMAP_INITIALIZER;
csm.options=options;
setTempCaseMap(&csm, locale.getName());
BreakIterator *bi=titleIter;
if(bi==NULL) {
UErrorCode errorCode=U_ZERO_ERROR;
bi=BreakIterator::createWordInstance(locale, errorCode);
if(U_FAILURE(errorCode)) {
setToBogus();
return *this;
}
}
csm.iter=reinterpret_cast<UBreakIterator *>(bi);
caseMap(&csm, unistr_case_internalToTitle);
if(titleIter==NULL) {
delete bi;
}
return *this;
}
U_NAMESPACE_END
#endif // !UCONFIG_NO_BREAK_ITERATION

View file

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 2002-2008, International Business Machines
* Copyright (C) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -111,4 +111,9 @@ UObject::~UObject() {}
U_NAMESPACE_END
U_NAMESPACE_USE
U_CAPI void U_EXPORT2
uprv_deleteUObject(void *obj) {
delete reinterpret_cast<UObject *>(obj);
}

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2002-2010, International Business Machines
* Copyright (C) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -288,32 +288,6 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
}
}
#if !UCONFIG_NO_NORMALIZATION
U_CAPI uint8_t U_EXPORT2
u_getCombiningClass(UChar32 c) {
UErrorCode errorCode=U_ZERO_ERROR;
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
if(U_SUCCESS(errorCode)) {
return impl->getCC(impl->getNorm16(c));
} else {
return 0;
}
}
static uint16_t
getFCD16(UChar32 c) {
UErrorCode errorCode=U_ZERO_ERROR;
const UTrie2 *trie=Normalizer2Factory::getFCDTrie(errorCode);
if(U_SUCCESS(errorCode)) {
return UTRIE2_GET16(trie, c);
} else {
return 0;
}
}
#endif
struct IntProperty;
typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which);
@ -427,7 +401,7 @@ static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) {
}
#else
static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
return getFCD16(c)>>8;
return unorm_getFCD16Simple(c)>>8;
}
#endif
@ -437,7 +411,7 @@ static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) {
}
#else
static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
return getFCD16(c)&0xff;
return unorm_getFCD16Simple(c)&0xff;
}
#endif

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 1997-2010, International Business Machines
* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@ -95,16 +95,3 @@ uscript_getCode(const char* nameOrAbbrOrLocale,
}
return numFilled;
}
U_CAPI const char* U_EXPORT2
uscript_getName(UScriptCode scriptCode){
return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
U_LONG_PROPERTY_NAME);
}
U_CAPI const char* U_EXPORT2
uscript_getShortName(UScriptCode scriptCode){
return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
U_SHORT_PROPERTY_NAME);
}

View file

@ -149,11 +149,6 @@ uset_clear(USet* set) {
((UnicodeSet*) set)->UnicodeSet::clear();
}
U_CAPI void U_EXPORT2
uset_closeOver(USet* set, int32_t attributes) {
((UnicodeSet*) set)->UnicodeSet::closeOver(attributes);
}
U_CAPI void U_EXPORT2
uset_removeAllStrings(USet* set) {
((UnicodeSet*) set)->UnicodeSet::removeAllStrings();

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2002-2006, International Business Machines
* Copyright (C) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -38,7 +38,7 @@ uset_openPattern(const UChar* pattern, int32_t patternLength,
*ec = U_MEMORY_ALLOCATION_ERROR;
return 0;
}
if (U_FAILURE(*ec)) {
delete set;
set = NULL;
@ -58,7 +58,7 @@ uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
*ec = U_MEMORY_ALLOCATION_ERROR;
return 0;
}
if (U_FAILURE(*ec)) {
delete set;
set = NULL;
@ -134,3 +134,8 @@ uset_toPattern(const USet* set,
((const UnicodeSet*) set)->toPattern(pat, escapeUnprintable);
return pat.extract(result, resultCapacity, *ec);
}
U_CAPI void U_EXPORT2
uset_closeOver(USet* set, int32_t attributes) {
((UnicodeSet*) set)->UnicodeSet::closeOver(attributes);
}

View file

@ -19,7 +19,7 @@
#include "unicode/uiter.h"
#include "ucase.h"
/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. */
/** Simple declaration to avoid including unicode/ubrk.h. */
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
# define UBRK_TYPEDEF_UBREAK_ITERATOR
typedef struct UBreakIterator UBreakIterator;
@ -105,48 +105,108 @@ typedef struct UCaseMap UCaseMap;
# define UCASEMAP_INITIALIZER { NULL, NULL, { 0 }, 0, 0 }
#endif
enum {
TO_LOWER,
TO_UPPER,
TO_TITLE,
FOLD_CASE
};
U_CFUNC void
ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale);
U_CFUNC int32_t
ustr_toLower(const UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode);
#ifndef U_STRING_CASE_MAPPER_DEFINED
#define U_STRING_CASE_MAPPER_DEFINED
U_CFUNC int32_t
ustr_toUpper(const UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode);
#if !UCONFIG_NO_BREAK_ITERATION
U_CFUNC int32_t
ustr_toTitle(const UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBreakIterator *titleIter,
const char *locale, uint32_t options,
UErrorCode *pErrorCode);
/**
* String case mapping function type, used by ustrcase_map().
* All error checking must be done.
* The UCaseMap must be fully initialized, with locale and/or iter set as needed.
* src and dest must not overlap.
*/
typedef int32_t U_CALLCONV
UStringCaseMapper(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode);
#endif
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToLower(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode);
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToUpper(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToTitle(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode);
#endif
/** Implements UStringCaseMapper. */
U_CFUNC int32_t U_CALLCONV
ustrcase_internalFold(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode);
/**
* Internal case folding function.
* Implements argument checking and buffer handling
* for string case mapping as a common function.
*/
U_CFUNC int32_t
ustr_foldCase(const UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
uint32_t options,
UErrorCode *pErrorCode);
ustrcase_map(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UStringCaseMapper *stringCaseMapper,
UErrorCode *pErrorCode);
/**
* UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
* UTF-8 version of UStringCaseMapper.
* All error checking must be done.
* The UCaseMap must be fully initialized, with locale and/or iter set as needed.
* src and dest must not overlap.
*/
typedef int32_t U_CALLCONV
UTF8CaseMapper(const UCaseMap *csm,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode);
/** Implements UTF8CaseMapper. */
U_CFUNC int32_t U_CALLCONV
ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode);
/**
* Implements argument checking and buffer handling
* for UTF-8 string case mapping as a common function.
*/
U_CFUNC int32_t
ucasemap_mapUTF8(const UCaseMap *csm,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper,
UErrorCode *pErrorCode);
U_CAPI int32_t U_EXPORT2
ustr_hashUCharsN(const UChar *str, int32_t length);
U_CAPI int32_t U_EXPORT2
ustr_hashCharsN(const char *str, int32_t length);
U_CAPI int32_t U_EXPORT2
ustr_hashICharsN(const char *str, int32_t length);
/**
* NUL-terminate a UChar * string if possible.

View file

@ -0,0 +1,91 @@
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: ustr_titlecase_brkiter.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2011may30
* created by: Markus W. Scherer
*
* Titlecasing functions that are based on BreakIterator
* were moved here to break dependency cycles among parts of the common library.
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/brkiter.h"
#include "unicode/ubrk.h"
#include "unicode/ucasemap.h"
#include "cmemory.h"
#include "ucase.h"
#include "ustr_imp.h"
/* functions available in the common library (for unistr_case.cpp) */
/*
* Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
* Do this fast because it is called with every function call.
* Duplicate of the same function in ustrcase.cpp, to keep it inline.
*/
static inline void
setTempCaseMap(UCaseMap *csm, const char *locale) {
if(csm->csp==NULL) {
csm->csp=ucase_getSingleton();
}
if(locale!=NULL && locale[0]==0) {
csm->locale[0]=0;
} else {
ustrcase_setTempCaseMapLocale(csm, locale);
}
}
/* public API functions */
U_CAPI int32_t U_EXPORT2
u_strToTitle(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBreakIterator *titleIter,
const char *locale,
UErrorCode *pErrorCode) {
UCaseMap csm=UCASEMAP_INITIALIZER;
setTempCaseMap(&csm, locale);
if(titleIter!=NULL) {
ubrk_setText(csm.iter=titleIter, src, srcLength, pErrorCode);
} else {
csm.iter=ubrk_open(UBRK_WORD, csm.locale, src, srcLength, pErrorCode);
}
int32_t length=ustrcase_map(
&csm,
dest, destCapacity,
src, srcLength,
ustrcase_internalToTitle, pErrorCode);
if(titleIter==NULL && csm.iter!=NULL) {
ubrk_close(csm.iter);
}
return length;
}
U_CAPI int32_t U_EXPORT2
ucasemap_toTitle(UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode) {
if(csm->iter!=NULL) {
ubrk_setText(csm->iter, src, srcLength, pErrorCode);
} else {
csm->iter=ubrk_open(UBRK_WORD, csm->locale, src, srcLength, pErrorCode);
}
return ustrcase_map(
csm,
dest, destCapacity,
src, srcLength,
ustrcase_internalToTitle, pErrorCode);
}
#endif // !UCONFIG_NO_BREAK_ITERATION

View file

@ -19,7 +19,7 @@
*/
#include "unicode/utypes.h"
#include "unicode/uloc.h"
#include "unicode/brkiter.h"
#include "unicode/ustring.h"
#include "unicode/ucasemap.h"
#include "unicode/ubrk.h"
@ -29,9 +29,11 @@
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
U_NAMESPACE_USE
/* string casing ------------------------------------------------------------ */
/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
static inline int32_t
appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
int32_t result, const UChar *s) {
@ -155,81 +157,31 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
return destIndex;
}
static void
setTempCaseMapLocale(UCaseMap *csm, const char *locale, UErrorCode * /*pErrorCode*/) {
/*
* We could call ucasemap_setLocale(), but here we really only care about
* the initial language subtag, we need not return the real string via
* ucasemap_getLocale(), and we don't care about only getting "x" from
* "x-some-thing" etc.
*
* We ignore locales with a longer-than-3 initial subtag.
*
* We also do not fill in the locCache because it is rarely used,
* and not worth setting unless we reuse it for many case mapping operations.
* (That's why UCaseMap was created.)
*/
int i;
char c;
/* the internal functions require locale!=NULL */
if(locale==NULL) {
locale=uloc_getDefault();
}
for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) {
csm->locale[i]=c;
}
if(i<=3) {
csm->locale[i]=0; /* Up to 3 non-separator characters. */
} else {
csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */
}
}
/*
* Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
* Do this fast because it is called with every function call.
*/
static inline void
setTempCaseMap(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
if(csm->csp==NULL) {
csm->csp=ucase_getSingleton();
}
if(locale!=NULL && locale[0]==0) {
csm->locale[0]=0;
} else {
setTempCaseMapLocale(csm, locale, pErrorCode);
}
}
#if !UCONFIG_NO_BREAK_ITERATION
/*
* Internal titlecasing function.
*/
static int32_t
_toTitle(UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, UCaseContext *csc,
int32_t srcLength,
UErrorCode *pErrorCode) {
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToTitle(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode) {
const UChar *s;
UChar32 c;
int32_t prev, titleStart, titleLimit, idx, destIndex, length;
UBool isFirstIndex;
if(csm->iter!=NULL) {
ubrk_setText(csm->iter, src, srcLength, pErrorCode);
} else {
csm->iter=ubrk_open(UBRK_WORD, csm->locale,
src, srcLength,
pErrorCode);
}
if(U_FAILURE(*pErrorCode)) {
return 0;
}
// Use the C++ abstract base class to minimize dependencies.
// TODO: Change UCaseMap.iter to store a BreakIterator directly.
BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
/* set up local variables */
int32_t locCache=csm->locCache;
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
destIndex=0;
prev=0;
isFirstIndex=TRUE;
@ -239,9 +191,9 @@ _toTitle(UCaseMap *csm,
/* find next index where to titlecase */
if(isFirstIndex) {
isFirstIndex=FALSE;
idx=ubrk_first(csm->iter);
idx=bi->first();
} else {
idx=ubrk_next(csm->iter);
idx=bi->next();
}
if(idx==UBRK_DONE || idx>srcLength) {
idx=srcLength;
@ -291,14 +243,14 @@ _toTitle(UCaseMap *csm,
if(titleStart<titleLimit) {
/* titlecase c which is from [titleStart..titleLimit[ */
csc->cpStart=titleStart;
csc->cpLimit=titleLimit;
c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &csm->locCache);
csc.cpStart=titleStart;
csc.cpLimit=titleLimit;
c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s, csm->locale, &locCache);
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
/* Special case Dutch IJ titlecasing */
if ( titleStart+1 < idx &&
ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH &&
ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH &&
( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) &&
( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) {
c=(UChar32) 0x004A;
@ -314,7 +266,7 @@ _toTitle(UCaseMap *csm,
_caseMap(
csm, ucase_toFullLower,
dest+destIndex, destCapacity-destIndex,
src, csc,
src, &csc,
titleLimit, idx,
pErrorCode);
} else {
@ -338,83 +290,41 @@ _toTitle(UCaseMap *csm,
return destIndex;
}
#endif
#endif // !UCONFIG_NO_BREAK_ITERATION
/* functions available in the common library (for unistr_case.cpp) */
U_CFUNC int32_t
ustr_toLower(const UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode) {
UCaseMap csm=UCASEMAP_INITIALIZER;
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToLower(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode) {
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csm.csp=csp;
setTempCaseMap(&csm, locale, pErrorCode);
csc.p=(void *)src;
csc.limit=srcLength;
return _caseMap(&csm, ucase_toFullLower,
dest, destCapacity,
src, &csc, 0, srcLength,
pErrorCode);
return _caseMap(
csm, ucase_toFullLower,
dest, destCapacity,
src, &csc, 0, srcLength,
pErrorCode);
}
U_CFUNC int32_t
ustr_toUpper(const UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode) {
UCaseMap csm=UCASEMAP_INITIALIZER;
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToUpper(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode) {
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csm.csp=csp;
setTempCaseMap(&csm, locale, pErrorCode);
csc.p=(void *)src;
csc.limit=srcLength;
return _caseMap(&csm, ucase_toFullUpper,
dest, destCapacity,
src, &csc, 0, srcLength,
pErrorCode);
return _caseMap(
csm, ucase_toFullUpper,
dest, destCapacity,
src, &csc, 0, srcLength,
pErrorCode);
}
#if !UCONFIG_NO_BREAK_ITERATION
U_CFUNC int32_t
ustr_toTitle(const UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBreakIterator *titleIter,
const char *locale, uint32_t options,
UErrorCode *pErrorCode) {
UCaseMap csm=UCASEMAP_INITIALIZER;
UCaseContext csc=UCASECONTEXT_INITIALIZER;
int32_t length;
csm.csp=csp;
csm.iter=titleIter;
csm.options=options;
setTempCaseMap(&csm, locale, pErrorCode);
csc.p=(void *)src;
csc.limit=srcLength;
length=_toTitle(&csm,
dest, destCapacity,
src, &csc, srcLength,
pErrorCode);
if(titleIter==NULL && csm.iter!=NULL) {
ubrk_close(csm.iter);
}
return length;
}
#endif
U_CFUNC int32_t
static int32_t
ustr_foldCase(const UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
@ -444,26 +354,27 @@ ustr_foldCase(const UCaseProps *csp,
return destIndex;
}
/*
* Implement argument checking and buffer handling
* for string case mapping as a common function.
*/
U_CFUNC int32_t U_CALLCONV
ustrcase_internalFold(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode) {
return ustr_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode);
}
/* common internal function for public API functions */
static int32_t
caseMap(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
int32_t toWhichCase,
UErrorCode *pErrorCode) {
U_CFUNC int32_t
ustrcase_map(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UStringCaseMapper *stringCaseMapper,
UErrorCode *pErrorCode) {
UChar buffer[300];
UChar *temp;
int32_t destLength;
/* check argument values */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
if(U_FAILURE(*pErrorCode)) {
return 0;
}
if( destCapacity<0 ||
@ -501,40 +412,7 @@ caseMap(const UCaseMap *csm,
temp=dest;
}
destLength=0;
if(toWhichCase==FOLD_CASE) {
destLength=ustr_foldCase(csm->csp, temp, destCapacity, src, srcLength,
csm->options, pErrorCode);
} else {
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
if(toWhichCase==TO_LOWER) {
destLength=_caseMap(csm, ucase_toFullLower,
temp, destCapacity,
src, &csc,
0, srcLength,
pErrorCode);
} else if(toWhichCase==TO_UPPER) {
destLength=_caseMap(csm, ucase_toFullUpper,
temp, destCapacity,
src, &csc,
0, srcLength,
pErrorCode);
} else /* if(toWhichCase==TO_TITLE) */ {
#if UCONFIG_NO_BREAK_ITERATION
*pErrorCode=U_UNSUPPORTED_ERROR;
#else
/* UCaseMap is actually non-const in toTitle() APIs. */
destLength=_toTitle((UCaseMap *)csm, temp, destCapacity,
src, &csc, srcLength,
pErrorCode);
#endif
}
}
destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, pErrorCode);
if(temp!=dest) {
/* copy the result string to the destination buffer */
if(destLength>0) {
@ -553,68 +431,6 @@ caseMap(const UCaseMap *csm,
/* public API functions */
U_CAPI int32_t U_EXPORT2
u_strToLower(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode) {
UCaseMap csm=UCASEMAP_INITIALIZER;
setTempCaseMap(&csm, locale, pErrorCode);
return caseMap(&csm,
dest, destCapacity,
src, srcLength,
TO_LOWER, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
u_strToUpper(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode) {
UCaseMap csm=UCASEMAP_INITIALIZER;
setTempCaseMap(&csm, locale, pErrorCode);
return caseMap(&csm,
dest, destCapacity,
src, srcLength,
TO_UPPER, pErrorCode);
}
#if !UCONFIG_NO_BREAK_ITERATION
U_CAPI int32_t U_EXPORT2
u_strToTitle(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBreakIterator *titleIter,
const char *locale,
UErrorCode *pErrorCode) {
UCaseMap csm=UCASEMAP_INITIALIZER;
int32_t length;
csm.iter=titleIter;
setTempCaseMap(&csm, locale, pErrorCode);
length=caseMap(&csm,
dest, destCapacity,
src, srcLength,
TO_TITLE, pErrorCode);
if(titleIter==NULL && csm.iter!=NULL) {
ubrk_close(csm.iter);
}
return length;
}
U_CAPI int32_t U_EXPORT2
ucasemap_toTitle(UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode) {
return caseMap(csm,
dest, destCapacity,
src, srcLength,
TO_TITLE, pErrorCode);
}
#endif
U_CAPI int32_t U_EXPORT2
u_strFoldCase(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
@ -623,10 +439,11 @@ u_strFoldCase(UChar *dest, int32_t destCapacity,
UCaseMap csm=UCASEMAP_INITIALIZER;
csm.csp=ucase_getSingleton();
csm.options=options;
return caseMap(&csm,
dest, destCapacity,
src, srcLength,
FOLD_CASE, pErrorCode);
return ustrcase_map(
&csm,
dest, destCapacity,
src, srcLength,
ustrcase_internalFold, pErrorCode);
}
/* case-insensitive string comparisons -------------------------------------- */

View file

@ -0,0 +1,110 @@
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: ustrcase_locale.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2011may31
* created by: Markus W. Scherer
*
* Locale-sensitive case mapping functions (ones that call uloc_getDefault())
* were moved here to break dependency cycles among parts of the common library.
*/
#include "unicode/utypes.h"
#include "unicode/ucasemap.h"
#include "unicode/uloc.h"
#include "unicode/ustring.h"
#include "ucase.h"
#include "ustr_imp.h"
U_CFUNC void
ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale) {
/*
* We could call ucasemap_setLocale(), but here we really only care about
* the initial language subtag, we need not return the real string via
* ucasemap_getLocale(), and we don't care about only getting "x" from
* "x-some-thing" etc.
*
* We ignore locales with a longer-than-3 initial subtag.
*
* We also do not fill in the locCache because it is rarely used,
* and not worth setting unless we reuse it for many case mapping operations.
* (That's why UCaseMap was created.)
*/
int i;
char c;
/* the internal functions require locale!=NULL */
if(locale==NULL) {
// Do not call uprv_getDefaultLocaleID() because that does not see
// changes to the default locale via uloc_setDefault().
// It would also be inefficient if used frequently because uprv_getDefaultLocaleID()
// does not cache the locale ID.
//
// Unfortunately, uloc_getDefault() has many dependencies.
// We only care about a small set of language subtags,
// and we do not need the locale ID to be canonicalized.
//
// Best is to not call case mapping functions with a NULL locale ID.
locale=uloc_getDefault();
}
for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) {
csm->locale[i]=c;
}
if(i<=3) {
csm->locale[i]=0; /* Up to 3 non-separator characters. */
} else {
csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */
}
}
/*
* Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
* Do this fast because it is called with every function call.
*/
static inline void
setTempCaseMap(UCaseMap *csm, const char *locale) {
if(csm->csp==NULL) {
csm->csp=ucase_getSingleton();
}
if(locale!=NULL && locale[0]==0) {
csm->locale[0]=0;
} else {
ustrcase_setTempCaseMapLocale(csm, locale);
}
}
/* public API functions */
U_CAPI int32_t U_EXPORT2
u_strToLower(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode) {
UCaseMap csm=UCASEMAP_INITIALIZER;
setTempCaseMap(&csm, locale);
return ustrcase_map(
&csm,
dest, destCapacity,
src, srcLength,
ustrcase_internalToLower, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
u_strToUpper(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode) {
UCaseMap csm=UCASEMAP_INITIALIZER;
setTempCaseMap(&csm, locale);
return ustrcase_map(
&csm,
dest, destCapacity,
src, srcLength,
ustrcase_internalToUpper, pErrorCode);
}

View file

@ -1463,3 +1463,47 @@ u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCod
__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
return length;
}
// Compute the hash code for a string -------------------------------------- ***
// Moved here from uhash.c so that UnicodeString::hashCode() does not depend
// on UHashtable code.
/*
Compute the hash by iterating sparsely over about 32 (up to 63)
characters spaced evenly through the string. For each character,
multiply the previous hash value by a prime number and add the new
character in, like a linear congruential random number generator,
producing a pseudorandom deterministic value well distributed over
the output range. [LIU]
*/
#define STRING_HASH(TYPE, STR, STRLEN, DEREF) \
int32_t hash = 0; \
const TYPE *p = (const TYPE*) STR; \
if (p != NULL) { \
int32_t len = (int32_t)(STRLEN); \
int32_t inc = ((len - 32) / 32) + 1; \
const TYPE *limit = p + len; \
while (p<limit) { \
hash = (hash * 37) + DEREF; \
p += inc; \
} \
} \
return hash
/* Used by UnicodeString to compute its hashcode - Not public API. */
U_CAPI int32_t U_EXPORT2
ustr_hashUCharsN(const UChar *str, int32_t length) {
STRING_HASH(UChar, str, length, *p);
}
U_CAPI int32_t U_EXPORT2
ustr_hashCharsN(const char *str, int32_t length) {
STRING_HASH(uint8_t, str, length, *p);
}
U_CAPI int32_t U_EXPORT2
ustr_hashICharsN(const char *str, int32_t length) {
STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p));
}

View file

@ -9,7 +9,6 @@
*/
#include "unicode/unimatch.h"
#include "unicode/uniset.h"
#include "patternprops.h"
#include "util.h"
@ -407,22 +406,3 @@ void ICU_Utility::appendToRule(UnicodeString& rule,
}
U_NAMESPACE_END
U_CAPI U_NAMESPACE_QUALIFIER UnicodeSet* U_EXPORT2
uprv_openPatternWhiteSpaceSet(UErrorCode* ec) {
if(U_FAILURE(*ec)) {
return NULL;
}
// create a set with the Pattern_White_Space characters,
// without a pattern string for fewer code dependencies
U_NAMESPACE_QUALIFIER UnicodeSet *set=new U_NAMESPACE_QUALIFIER UnicodeSet(9, 0xd);
// Check for new failure.
if (set == NULL) {
*ec = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
set->UnicodeSet::add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
return set;
}
//eof

View file

@ -23,7 +23,6 @@
U_NAMESPACE_BEGIN
class UnicodeMatcher;
class UnicodeSet;
class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ {
public:
@ -236,15 +235,5 @@ private:
U_NAMESPACE_END
/**
* Returns a new set with the Pattern_White_Space characters.
* The caller must close/delete the result.
* Stable set of characters, won't change.
* See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
* @internal
*/
U_CAPI U_NAMESPACE_QUALIFIER UnicodeSet* U_EXPORT2
uprv_openPatternWhiteSpaceSet(UErrorCode* ec);
#endif
//eof

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2010, International Business Machines
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: uts46.cpp
@ -22,6 +22,7 @@
#include "cmemory.h"
#include "cstring.h"
#include "punycode.h"
#include "ubidi_props.h"
#include "ustr_imp.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
@ -1102,6 +1103,7 @@ isASCIIOkBiDi(const char *s, int32_t length) {
UBool
UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
const UBiDiProps *bdp=ubidi_getSingleton();
// [IDNA2008-Tables]
// 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
for(int32_t i=0; i<labelLength; ++i) {
@ -1123,7 +1125,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
}
// check precontext (Joining_Type:{L,D})(Joining_Type:T)*
for(;;) {
UJoiningType type=(UJoiningType)u_getIntPropertyValue(c, UCHAR_JOINING_TYPE);
UJoiningType type=ubidi_getJoiningType(bdp, c);
if(type==U_JT_TRANSPARENT) {
if(j==0) {
return FALSE;
@ -1141,7 +1143,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
return FALSE;
}
U16_NEXT_UNSAFE(label, j, c);
UJoiningType type=(UJoiningType)u_getIntPropertyValue(c, UCHAR_JOINING_TYPE);
UJoiningType type=ubidi_getJoiningType(bdp, c);
if(type==U_JT_TRANSPARENT) {
// just skip this character
} else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) {

View file

@ -32,7 +32,7 @@
#include "uvector.h"
#include <string>
#include <iostream>
//#include <iostream>
U_NAMESPACE_BEGIN
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(AlphabeticIndex)
@ -47,15 +47,6 @@ sortCollateComparator(const void *context, const void *left, const void *right);
static int32_t U_CALLCONV
recordCompareFn(const void *context, const void *left, const void *right);
//
// UHash support function, delete a UnicodeSet
// TODO: move this function into uhash.
//
static void U_CALLCONV
uhash_deleteUnicodeSet(void *obj) {
delete static_cast<UnicodeSet *>(obj);
}
// UVector<Bucket *> support function, delete a Bucket.
static void U_CALLCONV
alphaIndex_deleteBucket(void *obj) {
@ -183,7 +174,7 @@ void AlphabeticIndex::buildIndex(UErrorCode &status) {
// that are the same according to the collator
UVector preferenceSorting(status); // Vector of UnicodeStrings; owned by the vector.
preferenceSorting.setDeleter(uhash_deleteUnicodeString);
preferenceSorting.setDeleter(uprv_deleteUObject);
appendUnicodeSetToUVector(preferenceSorting, *initialLabels_, status);
preferenceSorting.sortWithUComparator(PreferenceComparator, &status, status);
@ -236,7 +227,7 @@ void AlphabeticIndex::buildIndex(UErrorCode &status) {
const int32_t size = labelSet.size() - 1;
if (size > maxLabelCount_) {
UVector *newLabels = new UVector(status);
newLabels->setDeleter(uhash_deleteUnicodeString);
newLabels->setDeleter(uprv_deleteUObject);
int32_t count = 0;
int32_t old = -1;
for (int32_t srcIndex=0; srcIndex<labels_->size(); srcIndex++) {
@ -580,13 +571,13 @@ void AlphabeticIndex::init(UErrorCode &status) {
uhash_compareUnicodeString, // key Comparator,
NULL, // value Comparator
&status);
uhash_setKeyDeleter(alreadyIn_, uhash_deleteUnicodeString);
uhash_setValueDeleter(alreadyIn_, uhash_deleteUnicodeSet);
uhash_setKeyDeleter(alreadyIn_, uprv_deleteUObject);
uhash_setValueDeleter(alreadyIn_, uprv_deleteUObject);
bucketList_ = new UVector(status);
bucketList_->setDeleter(alphaIndex_deleteBucket);
labels_ = new UVector(status);
labels_->setDeleter(uhash_deleteUnicodeString);
labels_->setDeleter(uprv_deleteUObject);
labels_->setComparer(uhash_compareUnicodeString);
inputRecords_ = new UVector(status);
inputRecords_->setDeleter(alphaIndex_deleteRecord);
@ -839,7 +830,7 @@ UVector *AlphabeticIndex::firstStringsInScript(Collator *ruleBasedCollator, UErr
}
UVector *dest = new UVector(status);
dest->setDeleter(uhash_deleteUnicodeString);
dest->setDeleter(uprv_deleteUObject);
for (uint32_t i = 0; i < sizeof(results) / sizeof(results[0]); ++i) {
if (results[i].length() > 0) {
dest->addElement(results[i].clone(), status);
@ -876,7 +867,7 @@ UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
return NULL;
}
UVector *dest = new UVector(status);
dest->setDeleter(uhash_deleteUnicodeString);
dest->setDeleter(uprv_deleteUObject);
if (dest == NULL && U_SUCCESS(status)) {
status = U_MEMORY_ALLOCATION_ERROR;
}

View file

@ -437,7 +437,7 @@ protected:
} else {
ret->append((UChar)0x40); // '@' is a variant character
ret->append(UNICODE_STRING("calendar=", 9));
ret->append(UnicodeString(gCalTypes[getCalendarTypeForLocale(loc.getName())]));
ret->append(UnicodeString(gCalTypes[getCalendarTypeForLocale(loc.getName())], -1, US_INV));
}
return ret;
}

View file

@ -308,7 +308,7 @@ CurrencyPluralInfo::setupCurrencyPluralPattern(const Locale& loc, UErrorCode& st
std::cout << "pluralCount: " << pluralCount << "; pattern: " << result_1 << "\n";
#endif
fPluralCountToCurrencyUnitPattern->put(UnicodeString(pluralCount), pattern, status);
fPluralCountToCurrencyUnitPattern->put(UnicodeString(pluralCount, -1, US_INV), pattern, status);
}
}
}

View file

@ -1,7 +1,7 @@
/* ------------------------------------------------------------------ */
/* Decimal Context module */
/* ------------------------------------------------------------------ */
/* Copyright (c) IBM Corporation, 2000-2010. All rights reserved. */
/* Copyright (c) IBM Corporation, 2000-2011. All rights reserved. */
/* */
/* This software is made available under the terms of the */
/* ICU License -- ICU 1.8.1 and later. */
@ -25,10 +25,12 @@
#include "decContext.h" /* context and base types */
#include "decNumberLocal.h" /* decNumber local types, etc. */
#if 0 /* ICU: No need to test endianness at runtime. */
/* compile-time endian tester [assumes sizeof(Int)>1] */
static const Int mfcone=1; /* constant 1 */
static const Flag *mfctop=(Flag *)&mfcone; /* -> top byte */
#define LITEND *mfctop /* named flag; 1=little-endian */
#endif
/* ------------------------------------------------------------------ */
/* round-for-reround digits */
@ -210,7 +212,9 @@ U_CAPI decContext * U_EXPORT2 uprv_decContextSetRounding(decContext *context,
/* ------------------------------------------------------------------ */
U_CAPI decContext * U_EXPORT2 uprv_decContextSetStatus(decContext *context, uInt status) {
context->status|=status;
#if 0 /* ICU: Do not raise signals. */
if (status & context->traps) raise(SIGFPE);
#endif
return context;} /* decContextSetStatus */
/* ------------------------------------------------------------------ */
@ -374,6 +378,7 @@ U_CAPI const char * U_EXPORT2 uprv_decContextStatusToString(const decContext *co
/* */
/* No error is possible. */
/* ------------------------------------------------------------------ */
#if 0 /* ICU: Unused function. Anyway, do not call printf(). */
U_CAPI Int U_EXPORT2 uprv_decContextTestEndian(Flag quiet) {
Int res=0; /* optimist */
uInt dle=(uInt)DECLITEND; /* unsign */
@ -391,6 +396,7 @@ U_CAPI Int U_EXPORT2 uprv_decContextTestEndian(Flag quiet) {
}
return res;
} /* decContextTestEndian */
#endif
/* ------------------------------------------------------------------ */
/* decContextTestSavedStatus -- test bits in saved status */

View file

@ -433,7 +433,7 @@ DecimalFormat::construct(UErrorCode& status,
// For most locale, the patterns are probably the same for all
// plural count. If not, the right pattern need to be re-applied
// during format.
fCurrencyPluralInfo->getCurrencyPluralPattern("other", currencyPluralPatternForOther);
fCurrencyPluralInfo->getCurrencyPluralPattern(UNICODE_STRING("other", 5), currencyPluralPatternForOther);
patternUsed = &currencyPluralPatternForOther;
// TODO: not needed?
setCurrencyForSymbols();
@ -509,7 +509,7 @@ DecimalFormat::setupCurrencyAffixPatterns(UErrorCode& status) {
*fPosPrefixPattern,
*fPosSuffixPattern,
UCURR_SYMBOL_NAME);
fAffixPatternsForCurrency->put("default", affixPtn, status);
fAffixPatternsForCurrency->put(UNICODE_STRING("default", 7), affixPtn, status);
}
// save the unique currency plural patterns of this locale.
@ -556,14 +556,13 @@ DecimalFormat::setupCurrencyAffixes(const UnicodeString& pattern,
const PluralRules* pluralRules = fCurrencyPluralInfo->getPluralRules();
StringEnumeration* keywords = pluralRules->getKeywords(status);
if (U_SUCCESS(status)) {
const char* pluralCountCh;
while ((pluralCountCh = keywords->next(NULL, status)) != NULL) {
const UnicodeString* pluralCount;
while ((pluralCount = keywords->snext(status)) != NULL) {
if ( U_SUCCESS(status) ) {
UnicodeString pluralCount = UnicodeString(pluralCountCh);
expandAffixAdjustWidth(&pluralCount);
expandAffixAdjustWidth(pluralCount);
AffixesForCurrency* affix = new AffixesForCurrency(
fNegativePrefix, fNegativeSuffix, fPositivePrefix, fPositiveSuffix);
fAffixesForCurrency->put(pluralCount, affix, status);
fAffixesForCurrency->put(*pluralCount, affix, status);
}
}
}
@ -584,16 +583,15 @@ DecimalFormat::setupCurrencyAffixes(const UnicodeString& pattern,
const PluralRules* pluralRules = fCurrencyPluralInfo->getPluralRules();
StringEnumeration* keywords = pluralRules->getKeywords(status);
if (U_SUCCESS(status)) {
const char* pluralCountCh;
while ((pluralCountCh = keywords->next(NULL, status)) != NULL) {
const UnicodeString* pluralCount;
while ((pluralCount = keywords->snext(status)) != NULL) {
if ( U_SUCCESS(status) ) {
UnicodeString pluralCount = UnicodeString(pluralCountCh);
UnicodeString ptn;
fCurrencyPluralInfo->getCurrencyPluralPattern(pluralCount, ptn);
applyPatternInternally(pluralCount, ptn, false, parseErr, status);
fCurrencyPluralInfo->getCurrencyPluralPattern(*pluralCount, ptn);
applyPatternInternally(*pluralCount, ptn, false, parseErr, status);
AffixesForCurrency* affix = new AffixesForCurrency(
fNegativePrefix, fNegativeSuffix, fPositivePrefix, fPositiveSuffix);
fPluralAffixesForCurrency->put(pluralCount, affix, status);
fPluralAffixesForCurrency->put(*pluralCount, affix, status);
}
}
}
@ -3281,17 +3279,13 @@ void DecimalFormat::expandAffix(const UnicodeString& pattern,
// For other cases, pluralCount == null,
// and plural names are not needed.
int32_t len;
// TODO: num of char in plural count
char pluralCountChar[10];
if (pluralCount->length() >= 10) {
break;
}
pluralCount->extract(0, pluralCount->length(), pluralCountChar);
CharString pluralCountChar;
pluralCountChar.appendInvariantChars(*pluralCount, ec);
UBool isChoiceFormat;
const UChar* s = ucurr_getPluralName(currencyUChars,
fSymbols != NULL ? fSymbols->getLocale().getName() :
Locale::getDefault().getName(), &isChoiceFormat,
pluralCountChar, &len, &ec);
pluralCountChar.data(), &len, &ec);
affix += UnicodeString(s, len);
handler.addAttribute(kCurrencyField, beginIdx, affix.length());
} else if(intl) {

View file

@ -1349,7 +1349,8 @@ DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton,
DateIntervalInfo::parseSkeleton(inputSkeleton, inputSkeletonFieldWidth);
DateIntervalInfo::parseSkeleton(bestMatchSkeleton, bestMatchSkeletonFieldWidth);
if ( differenceInfo == 2 ) {
adjustedPtn.findAndReplace("v", "z");
adjustedPtn.findAndReplace(UnicodeString((UChar)0x76 /* v */),
UnicodeString((UChar)0x7a /* z */));
}
UBool inQuote = false;

View file

@ -267,42 +267,39 @@ DateIntervalInfo::initializeData(const Locale& locale, UErrorCode& err)
int32_t size = ures_getSize(itvDtPtnResource);
int32_t index;
for ( index = 0; index < size; ++index ) {
UResourceBundle* oneRes = ures_getByIndex(itvDtPtnResource, index,
NULL, &status);
LocalUResourceBundlePointer oneRes(ures_getByIndex(itvDtPtnResource, index,
NULL, &status));
if ( U_SUCCESS(status) ) {
const char* skeleton = ures_getKey(oneRes);
if ( skeleton == NULL ||
skeletonSet.geti(UnicodeString(skeleton)) == 1 ) {
ures_close(oneRes);
const char* skeleton = ures_getKey(oneRes.getAlias());
if (skeleton == NULL) {
continue;
}
skeletonSet.puti(UnicodeString(skeleton), 1, status);
UnicodeString skeletonUniStr(skeleton, -1, US_INV);
if ( skeletonSet.geti(skeletonUniStr) == 1 ) {
continue;
}
skeletonSet.puti(skeletonUniStr, 1, status);
if ( uprv_strcmp(skeleton, gFallbackPatternTag) == 0 ) {
ures_close(oneRes);
continue; // fallback
}
UResourceBundle* intervalPatterns = ures_getByKey(
itvDtPtnResource, skeleton, NULL, &status);
LocalUResourceBundlePointer intervalPatterns(ures_getByKey(
itvDtPtnResource, skeleton, NULL, &status));
if ( U_FAILURE(status) ) {
ures_close(intervalPatterns);
ures_close(oneRes);
break;
}
if ( intervalPatterns == NULL ) {
ures_close(intervalPatterns);
ures_close(oneRes);
continue;
}
const UChar* pattern;
const char* key;
int32_t ptLength;
int32_t ptnNum = ures_getSize(intervalPatterns);
int32_t ptnNum = ures_getSize(intervalPatterns.getAlias());
int32_t ptnIndex;
for ( ptnIndex = 0; ptnIndex < ptnNum; ++ptnIndex ) {
pattern = ures_getNextString(intervalPatterns, &ptLength, &key,
pattern = ures_getNextString(intervalPatterns.getAlias(), &ptLength, &key,
&status);
if ( U_FAILURE(status) ) {
break;
@ -323,12 +320,10 @@ DateIntervalInfo::initializeData(const Locale& locale, UErrorCode& err)
calendarField = UCAL_MINUTE;
}
if ( calendarField != UCAL_FIELD_COUNT ) {
setIntervalPatternInternally(skeleton, calendarField, pattern,status);
setIntervalPatternInternally(skeletonUniStr, calendarField, pattern,status);
}
}
ures_close(intervalPatterns);
}
ures_close(oneRes);
}
}
ures_close(itvDtPtnResource);

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
* Copyright (C) 2010, International Business Machines Corporation and *
* others. All Rights Reserved. *
* Copyright (C) 2010-2011, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
@ -418,7 +418,7 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& locale,
while ((key = e->next((int32_t *)0, status)) != NULL) {
locale.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status);
appendWithSep(resultRemainder, keyDisplayName(key, temp))
.append("=")
.append((UChar)0x3d /* = */)
.append(keyValueDisplayName(key, value, temp2));
}
delete e;

View file

@ -539,7 +539,7 @@ void MessageFormat::setArgStartFormat(int32_t argStart,
delete formatter;
return;
}
uhash_setValueDeleter(cachedFormatters, uhash_deleteUObject);
uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject);
}
if (formatter == NULL) {
formatter = new DummyFormat();
@ -841,7 +841,7 @@ MessageFormat::getFormatNames(UErrorCode& status) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
fFormatNames->setDeleter(uhash_deleteUObject);
fFormatNames->setDeleter(uprv_deleteUObject);
for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
fFormatNames->addElement(new UnicodeString(getArgName(partIndex)), status);
@ -1201,7 +1201,7 @@ void MessageFormat::copyObjects(const MessageFormat& that, UErrorCode& ec) {
if (U_FAILURE(ec)) {
return;
}
uhash_setValueDeleter(cachedFormatters, uhash_deleteUObject);
uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject);
}
const int32_t count = uhash_count(that.cachedFormatters);

View file

@ -25,7 +25,6 @@
#include "plurrule_impl.h"
#include "putilimp.h"
#include "ucln_in.h"
#include "uhash.h"
#include "ustrfmt.h"
#include "locutil.h"
@ -1381,7 +1380,7 @@ PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode
if (U_FAILURE(status)) {
return;
}
fKeywordNames.setDeleter(uhash_deleteUObject);
fKeywordNames.setDeleter(uprv_deleteUObject);
UBool addKeywordOther=TRUE;
RuleChain *node=header;
while(node!=NULL) {

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 1999-2008, International Business Machines
* Copyright (C) 1999-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
@ -28,7 +28,7 @@ TransliterationRuleData::TransliterationRuleData(UErrorCode& status)
if (U_FAILURE(status)) {
return;
}
variableNames.setValueDeleter(uhash_deleteUnicodeString);
variableNames.setValueDeleter(uprv_deleteUObject);
variables = 0;
variablesLength = 0;
}
@ -41,7 +41,7 @@ TransliterationRuleData::TransliterationRuleData(const TransliterationRuleData&
{
UErrorCode status = U_ZERO_ERROR;
int32_t i = 0;
variableNames.setValueDeleter(uhash_deleteUnicodeString);
variableNames.setValueDeleter(uprv_deleteUObject);
int32_t pos = -1;
const UHashElement *e;
while ((e = other.variableNames.nextElement(pos)) != 0) {

View file

@ -825,11 +825,11 @@ idBlockVector(statusReturn),
variablesVector(statusReturn),
segmentObjects(statusReturn)
{
idBlockVector.setDeleter(uhash_deleteUnicodeString);
idBlockVector.setDeleter(uprv_deleteUObject);
curData = NULL;
compoundFilter = NULL;
parseData = NULL;
variableNames.setValueDeleter(uhash_deleteUnicodeString);
variableNames.setValueDeleter(uprv_deleteUObject);
}
/**

View file

@ -3058,7 +3058,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
// next step. Otherwise, all time zone names starting with GMT/UT/UTC
// (for example, "UTT") will fail.
if (gmtLen > 0 && ((text.length() - start) == gmtLen)) {
TimeZone *tz = TimeZone::createTimeZone(UnicodeString("Etc/GMT"));
TimeZone *tz = TimeZone::createTimeZone(UNICODE_STRING("Etc/GMT", 7));
cal.adoptTimeZone(tz);
return start + gmtLen;
}
@ -3115,7 +3115,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
// Step 5
// If we saw standalone GMT zero pattern, then use GMT.
if (gmtLen > 0) {
TimeZone *tz = TimeZone::createTimeZone(UnicodeString("Etc/GMT"));
TimeZone *tz = TimeZone::createTimeZone(UNICODE_STRING("Etc/GMT", 7));
cal.adoptTimeZone(tz);
return start + gmtLen;
}

View file

@ -33,27 +33,27 @@ SimpleDateFormatStaticSets::SimpleDateFormatStaticSets(UErrorCode *status)
fTimeIgnorables(NULL),
fOtherIgnorables(NULL)
{
fDateIgnorables = new UnicodeSet("[-,./[:whitespace:]]", *status);
fTimeIgnorables = new UnicodeSet("[-.:[:whitespace:]]", *status);
fOtherIgnorables = new UnicodeSet("[:whitespace:]", *status);
fDateIgnorables = new UnicodeSet(UNICODE_STRING("[-,./[:whitespace:]]", 20), *status);
fTimeIgnorables = new UnicodeSet(UNICODE_STRING("[-.:[:whitespace:]]", 19), *status);
fOtherIgnorables = new UnicodeSet(UNICODE_STRING("[:whitespace:]", 14), *status);
// Check for null pointers
if (fDateIgnorables == NULL || fTimeIgnorables == NULL || fOtherIgnorables == NULL) {
goto ExitConstrDeleteAll;
}
// Freeze all the sets
fDateIgnorables->freeze();
fTimeIgnorables->freeze();
fOtherIgnorables->freeze();
return; // If we reached this point, everything is fine so just exit
ExitConstrDeleteAll: // Remove all sets and return error
delete fDateIgnorables; fDateIgnorables = NULL;
delete fTimeIgnorables; fTimeIgnorables = NULL;
delete fOtherIgnorables; fOtherIgnorables = NULL;
*status = U_MEMORY_ALLOCATION_ERROR;
}

View file

@ -70,6 +70,7 @@
#include "cmemory.h"
#include "cstring.h"
#include "putilimp.h"
#include "ustr_imp.h"
/* public RuleBasedCollator constructor ---------------------------------- */
@ -644,7 +645,7 @@ int32_t RuleBasedCollator::hashCode() const
{
int32_t length;
const UChar *rules = ucol_getRules(ucollator, &length);
return uhash_hashUCharsN(rules, length);
return ustr_hashUCharsN(rules, length);
}
/**

View file

@ -11,6 +11,7 @@
#if !UCONFIG_NO_FORMATTING
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "hash.h"
@ -495,12 +496,13 @@ TimeUnitFormat::readFromCurrentLocale(UTimeUnitFormatStyle style, const char* ke
if (fNumberFormat != NULL) {
messageFormat->setFormat(0, *fNumberFormat);
}
MessageFormat** formatters = (MessageFormat**)countToPatterns->get(pluralCount);
UnicodeString pluralCountUniStr(pluralCount, -1, US_INV);
MessageFormat** formatters = (MessageFormat**)countToPatterns->get(pluralCountUniStr);
if (formatters == NULL) {
formatters = (MessageFormat**)uprv_malloc(UTMUTFMT_FORMAT_STYLE_COUNT*sizeof(MessageFormat*));
formatters[UTMUTFMT_FULL_STYLE] = NULL;
formatters[UTMUTFMT_ABBREVIATED_STYLE] = NULL;
countToPatterns->put(pluralCount, formatters, err);
countToPatterns->put(pluralCountUniStr, formatters, err);
if (U_FAILURE(err)) {
uprv_free(formatters);
}
@ -557,8 +559,8 @@ TimeUnitFormat::checkConsistency(UTimeUnitFormatStyle style, const char* key, UE
//
StringEnumeration* keywords = fPluralRules->getKeywords(err);
if (U_SUCCESS(err)) {
const char* pluralCount;
while ((pluralCount = keywords->next(NULL, err)) != NULL) {
const UnicodeString* pluralCount;
while ((pluralCount = keywords->snext(err)) != NULL) {
if ( U_SUCCESS(err) ) {
for (int32_t i = 0; i < TimeUnit::UTIMEUNIT_FIELD_COUNT; ++i) {
// for each time unit,
@ -572,13 +574,15 @@ TimeUnitFormat::checkConsistency(UTimeUnitFormatStyle style, const char* key, UE
}
fTimeUnitToCountToPatterns[i] = countToPatterns;
}
MessageFormat** formatters = (MessageFormat**)countToPatterns->get(pluralCount);
MessageFormat** formatters = (MessageFormat**)countToPatterns->get(*pluralCount);
if( formatters == NULL || formatters[style] == NULL ) {
// look through parents
const char* localeName = fLocale.getName();
CharString pluralCountChars;
pluralCountChars.appendInvariantChars(*pluralCount, err);
searchInLocaleChain(style, key, localeName,
(TimeUnit::UTimeUnitFields)i,
pluralCount, pluralCount,
*pluralCount, pluralCountChars.data(),
countToPatterns, err);
}
}
@ -601,7 +605,7 @@ TimeUnitFormat::checkConsistency(UTimeUnitFormatStyle style, const char* key, UE
void
TimeUnitFormat::searchInLocaleChain(UTimeUnitFormatStyle style, const char* key, const char* localeName,
TimeUnit::UTimeUnitFields srcTimeUnitField,
const char* srcPluralCount,
const UnicodeString& srcPluralCount,
const char* searchPluralCount,
Hashtable* countToPatterns,
UErrorCode& err) {

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (c) 2001-2010, International Business Machines
* Copyright (c) 2001-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
@ -517,7 +517,7 @@ TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
availableIDs(status)
{
registry.setValueDeleter(deleteEntry);
availableIDs.setDeleter(uhash_deleteUnicodeString);
availableIDs.setDeleter(uprv_deleteUObject);
availableIDs.setComparer(uhash_compareCaselessUnicodeString);
specDAG.setValueDeleter(uhash_deleteHashtable);
}
@ -936,12 +936,12 @@ void TransliteratorRegistry::registerSTV(const UnicodeString& source,
if (U_FAILURE(status) || targets == 0) {
return;
}
targets->setValueDeleter(uhash_deleteUObject);
targets->setValueDeleter(uprv_deleteUObject);
specDAG.put(source, targets, status);
}
UVector *variants = (UVector*) targets->get(target);
if (variants == 0) {
variants = new UVector(uhash_deleteUnicodeString,
variants = new UVector(uprv_deleteUObject,
uhash_compareCaselessUnicodeString, status);
if (variants == 0) {
return;

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (c) 2002-2009, International Business Machines Corporation
* Copyright (c) 2002-2011, International Business Machines Corporation
* and others. All Rights Reserved.
**********************************************************************
* Date Name Description
@ -907,7 +907,7 @@ void TransliteratorIDParser::init(UErrorCode &status) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
special_inverses->setValueDeleter(uhash_deleteUnicodeString);
special_inverses->setValueDeleter(uprv_deleteUObject);
umtx_lock(&LOCK);
if (SPECIAL_INVERSES == NULL) {

View file

@ -369,7 +369,7 @@ TimeZoneFormatDelegate::TimeZoneFormatDelegate(const Locale& locale, UErrorCode&
if (!gTimeZoneFormatCacheInitialized) {
gTimeZoneFormatCache = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
if (U_SUCCESS(status)) {
uhash_setKeyDeleter(gTimeZoneFormatCache, uhash_freeBlock);
uhash_setKeyDeleter(gTimeZoneFormatCache, uprv_free);
uhash_setValueDeleter(gTimeZoneFormatCache, deleteTimeZoneFormatCacheEntry);
gTimeZoneFormatCacheInitialized = TRUE;
ucln_i18n_registerCleanup(UCLN_I18N_TIMEZONEFORMAT, timeZoneFormat_cleanup);

View file

@ -69,7 +69,7 @@ hashPartialLocationKey(const UHashTok key) {
.append(p->mzID)
.append((UChar)0x23)
.append((UChar)(p->isLong ? 0x4C : 0x53));
return uhash_hashUCharsN(str.getBuffer(), str.length());
return str.hashCode();
}
/**
@ -209,7 +209,7 @@ GNameSearchHandler::handleMatch(int32_t matchLength, const CharacterNode *node,
if ((nameinfo->type & fTypes) != 0) {
// matches a requested type
if (fResults == NULL) {
fResults = new UVector(uhash_freeBlock, NULL, status);
fResults = new UVector(uprv_free, NULL, status);
if (fResults == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}
@ -350,7 +350,7 @@ TimeZoneGenericNames::initialize(const Locale& locale, UErrorCode& status) {
cleanup();
return;
}
uhash_setKeyDeleter(fPartialLocationNamesMap, uhash_freeBlock);
uhash_setKeyDeleter(fPartialLocationNamesMap, uprv_free);
// no value deleter
// target region

View file

@ -132,7 +132,7 @@ TimeZoneNamesDelegate::TimeZoneNamesDelegate(const Locale& locale, UErrorCode& s
if (!gTimeZoneNamesCacheInitialized) {
gTimeZoneNamesCache = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
if (U_SUCCESS(status)) {
uhash_setKeyDeleter(gTimeZoneNamesCache, uhash_freeBlock);
uhash_setKeyDeleter(gTimeZoneNamesCache, uprv_free);
uhash_setValueDeleter(gTimeZoneNamesCache, deleteTimeZoneNamesCacheEntry);
gTimeZoneNamesCacheInitialized = TRUE;
ucln_i18n_registerCleanup(UCLN_I18N_TIMEZONENAMES, timeZoneNames_cleanup);
@ -277,7 +277,8 @@ TimeZoneNames::getExemplarLocationName(const UnicodeString& tzID, UnicodeString&
int32_t sep = tzID.lastIndexOf((UChar)0x2F /* '/' */);
if (sep > 0 && sep + 1 < tzID.length()) {
name.setTo(tzID, sep + 1);
name.findAndReplace("_", " ");
name.findAndReplace(UnicodeString((UChar)0x5f /* _ */),
UnicodeString((UChar)0x20 /* space */));
} else {
name.setToBogus();
}

View file

@ -866,7 +866,7 @@ ZNameSearchHandler::handleMatch(int32_t matchLength, const CharacterNode *node,
if ((nameinfo->type & fTypes) != 0) {
// matches a requested type
if (fResults == NULL) {
fResults = new UVector(uhash_freeBlock, NULL, status);
fResults = new UVector(uprv_free, NULL, status);
if (fResults == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}
@ -1090,7 +1090,7 @@ TimeZoneNamesImpl::getMetaZoneID(const UnicodeString& tzID, UDate date, UnicodeS
UnicodeString&
TimeZoneNamesImpl::getReferenceZoneID(const UnicodeString& mzID, const char* region, UnicodeString& tzID) const {
ZoneMeta::getZoneIdByMetazone(mzID, UnicodeString(region), tzID);
ZoneMeta::getZoneIdByMetazone(mzID, UnicodeString(region, -1, US_INV), tzID);
return tzID;
}

View file

@ -1397,7 +1397,7 @@ static const char* ReorderingTokenNames[] = {
static void toUpper(const char* src, char* dst, uint32_t length) {
for (uint32_t i = 0; *src != '\0' && i < length - 1; ++src, ++dst, ++i) {
*dst = toupper(*src);
*dst = uprv_toupper(*src);
}
*dst = '\0';
}

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2001-2010, International Business Machines
* Copyright (C) 2001-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -157,7 +157,7 @@ uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollat
if (U_FAILURE(*status)) {
goto allocation_failure;
}
uhash_setValueDeleter(t->prefixLookup, uhash_freeBlock);
uhash_setValueDeleter(t->prefixLookup, uprv_free);
t->contractions = uprv_cnttab_open(t->mapping, status);
if (U_FAILURE(*status)) {

View file

@ -135,12 +135,6 @@ U_CDECL_END
#endif
/*static inline void U_CALLCONV
uhash_freeBlockWrapper(void *obj) {
uhash_freeBlock(obj);
}*/
typedef struct {
uint32_t startCE;
uint32_t startContCE;
@ -2367,7 +2361,7 @@ void ucol_tok_initTokenList(
if(U_FAILURE(*status)) {
return;
}
uhash_setValueDeleter(src->tailored, uhash_freeBlock);
uhash_setValueDeleter(src->tailored, uprv_free);
src->opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
/* test for NULL */

View file

@ -243,7 +243,7 @@ private:
// fill in fTimeUnitToCountToPatterns from locale fall-back chain
void searchInLocaleChain(UTimeUnitFormatStyle style, const char* key, const char* localeName,
TimeUnit::UTimeUnitFields field, const char*,
TimeUnit::UTimeUnitFields field, const UnicodeString&,
const char*, Hashtable*, UErrorCode&);
// initialize hash table

View file

@ -233,19 +233,21 @@ void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesL
// Capture Group 8: A syntactically invalid line. Anything that didn't match before.
// Example Line from the confusables.txt source file:
// "1D702 ; 006E 0329 ; SL # MATHEMATICAL ITALIC SMALL ETA ... "
fParseLine = uregex_openC(
UnicodeString pattern(
"(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;" // Match the source char
"[ \\t]*([0-9A-Fa-f]+" // Match the replacement char(s)
"(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;" // (continued)
"\\s*(?:(SL)|(SA)|(ML)|(MA))" // Match the table type
"[ \\t]*(?:#.*?)?$" // Match any trailing #comment
"|^([ \\t]*(?:#.*?)?)$" // OR match empty lines or lines with only a #comment
"|^(.*?)$", // OR match any line, which catches illegal lines.
0, NULL, &status);
"|^(.*?)$", -1, US_INV); // OR match any line, which catches illegal lines.
// TODO: Why are we using the regex C API here? C++ would just take UnicodeString...
fParseLine = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);
// Regular expression for parsing a hex number out of a space-separated list of them.
// Capture group 1 gets the number, with spaces removed.
fParseHexNum = uregex_openC("\\s*([0-9A-F]+)", 0, NULL, &status);
pattern = UNICODE_STRING_SIMPLE("\\s*([0-9A-F]+)");
fParseHexNum = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);
// Zap any Byte Order Mark at the start of input. Changing it to a space is benign
// given the syntax of the input.

View file

@ -52,7 +52,6 @@ U_NAMESPACE_USE
// The expression will match _all_ lines, including erroneous lines.
// The result of the parse is returned via the contents of the (match) groups.
static const char *parseExp =
"(?m)" // Multi-line mode
"^([ \\t]*(?:#.*?)?)$" // A blank or comment line. Matches Group 1.
"|^(?:" // OR
@ -115,7 +114,8 @@ void buildWSConfusableData(SpoofImpl *spImpl, const char * confusablesWS,
anyCaseTrie = utrie2_open(0, 0, &status);
lowerCaseTrie = utrie2_open(0, 0, &status);
UnicodeString pattern(parseExp, -1, US_INV);
// The scriptSets vector provides a mapping from TRIE values to the set of scripts.
//
@ -150,10 +150,8 @@ void buildWSConfusableData(SpoofImpl *spImpl, const char * confusablesWS,
}
u_strFromUTF8(input, inputLen+1, NULL, confusablesWS, confusablesWSLen, &status);
parseRegexp = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);
parseRegexp = uregex_openC(parseExp, 0, NULL, &status);
// Zap any Byte Order Mark at the start of input. Changing it to a space is benign
// given the syntax of the input.
if (*input == 0xfeff) {

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2007-2010, International Business Machines Corporation and
* Copyright (C) 2007-2011, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
@ -18,7 +18,6 @@
#include "cmemory.h"
#include "uvector.h"
#include "gregoimp.h"
#include "uhash.h"
U_NAMESPACE_BEGIN
@ -962,7 +961,7 @@ VTimeZone::VTimeZone(const VTimeZone& source)
if (source.vtzlines != NULL) {
UErrorCode status = U_ZERO_ERROR;
int32_t size = source.vtzlines->size();
vtzlines = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, size, status);
vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, size, status);
if (U_SUCCESS(status)) {
for (int32_t i = 0; i < size; i++) {
UnicodeString *line = (UnicodeString*)source.vtzlines->elementAt(i);
@ -1007,7 +1006,7 @@ VTimeZone::operator=(const VTimeZone& right) {
if (right.vtzlines != NULL) {
UErrorCode status = U_ZERO_ERROR;
int32_t size = right.vtzlines->size();
vtzlines = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, size, status);
vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, size, status);
if (U_SUCCESS(status)) {
for (int32_t i = 0; i < size; i++) {
UnicodeString *line = (UnicodeString*)right.vtzlines->elementAt(i);
@ -1242,7 +1241,7 @@ VTimeZone::getTimeZoneRules(const InitialTimeZoneRule*& initial,
void
VTimeZone::load(VTZReader& reader, UErrorCode& status) {
vtzlines = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, DEFAULT_VTIMEZONE_LINES, status);
vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, DEFAULT_VTIMEZONE_LINES, status);
if (U_FAILURE(status)) {
return;
}
@ -1378,7 +1377,7 @@ VTimeZone::parse(UErrorCode& status) {
// Set the deleter to remove TimeZoneRule vectors to avoid memory leaks due to unowned TimeZoneRules.
rules->setDeleter(deleteTimeZoneRule);
dates = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
dates = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
if (U_FAILURE(status)) {
goto cleanupParse;
}
@ -1741,7 +1740,7 @@ VTimeZone::write(VTZWriter& writer, UErrorCode& status) const {
} else {
UVector *customProps = NULL;
if (olsonzid.length() > 0 && icutzver.length() > 0) {
customProps = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
customProps = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
if (U_FAILURE(status)) {
return;
}
@ -1769,7 +1768,7 @@ VTimeZone::write(UDate start, VTZWriter& writer, UErrorCode& status) /*const*/ {
}
InitialTimeZoneRule *initial = NULL;
UVector *transitionRules = NULL;
UVector customProps(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
UVector customProps(uprv_deleteUObject, uhash_compareUnicodeString, status);
UnicodeString tzid;
// Extract rules applicable to dates after the start time
@ -1833,7 +1832,7 @@ VTimeZone::writeSimple(UDate time, VTZWriter& writer, UErrorCode& status) /*cons
return;
}
UVector customProps(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
UVector customProps(uprv_deleteUObject, uhash_compareUnicodeString, status);
UnicodeString tzid;
// Extract simple rules

View file

@ -763,7 +763,7 @@ ZoneMeta::initAvailableMetaZoneIDs () {
if (!gMetaZoneIDsInitialized) {
UErrorCode status = U_ZERO_ERROR;
UHashtable *metaZoneIDTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
uhash_setKeyDeleter(metaZoneIDTable, uhash_deleteUnicodeString);
uhash_setKeyDeleter(metaZoneIDTable, uprv_deleteUObject);
// No valueDeleter, because the vector maintain the value objects
UVector *metaZoneIDs = NULL;
if (U_SUCCESS(status)) {
@ -775,7 +775,7 @@ ZoneMeta::initAvailableMetaZoneIDs () {
uhash_close(metaZoneIDTable);
}
if (U_SUCCESS(status)) {
metaZoneIDs->setDeleter(uhash_freeBlock);
metaZoneIDs->setDeleter(uprv_free);
UResourceBundle *rb = ures_openDirect(NULL, gMetaZones, &status);
UResourceBundle *bundle = ures_getByKey(rb, gMapTimezonesTag, NULL, &status);

View file

@ -0,0 +1,194 @@
#! /usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2011, International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: dependencies.py
#
# created on: 2011may26
"""Reader module for dependency data for the ICU dependency tester.
Reads dependencies.txt and makes the data available.
Attributes:
files: Set of "library/filename.o" files mentioned in the dependencies file.
items: Map from library or group names to item maps.
Each item has a "type" ("library" or "group" or "system_symbols").
A library or group item can have an optional set of "files" (as in the files attribute).
Each item can have an optional set of "deps" (libraries & groups).
A group item also has a "library" name unless it is a group of system symbols.
The one "system_symbols" item and its groups have sets of "system_symbols"
with standard-library system symbol names.
libraries: Set of library names mentioned in the dependencies file.
"""
__author__ = "Markus W. Scherer"
# TODO: Support binary items.
# .txt syntax: binary: tools/genrb
# item contents: {"type": "binary"} with optional files & deps
# A binary must not be used as a dependency for anything else.
import sys
files = set()
items = {}
libraries = set()
_line_number = 0
_groups_to_be_defined = set()
def _CheckLibraryName(name):
global _line_number
if not name:
sys.exit("Error:%d: \"library: \" without name" % _line_number)
if name.endswith(".o"):
sys.exit("Error:%d: invalid library name %s" % (_line_number, name))
def _CheckGroupName(name):
global _line_number
if not name:
sys.exit("Error:%d: \"group: \" without name" % _line_number)
if "/" in name or name.endswith(".o"):
sys.exit("Error:%d: invalid group name %s" % (_line_number, name))
def _CheckFileName(name):
global _line_number
if "/" in name or not name.endswith(".o"):
sys.exit("Error:%d: invalid file name %s" % (_line_number, name))
def _RemoveComment(line):
global _line_number
_line_number = _line_number + 1
index = line.find("#") # Remove trailing comment.
if index >= 0: line = line[:index]
return line.rstrip() # Remove trailing newlines etc.
def _ReadLine(f):
while True:
line = _RemoveComment(f.next())
if line: return line
def _ReadFiles(deps_file, item, library_name):
global files
item_files = item.get("files")
while True:
line = _ReadLine(deps_file)
if not line: continue
if not line.startswith(" "): return line
if item_files == None: item_files = item["files"] = set()
for file_name in line.split():
_CheckFileName(file_name)
file_name = library_name + "/" + file_name
if file_name in files:
sys.exit("Error:%d: file %s listed in multiple groups" % (_line_number, file_name))
files.add(file_name)
item_files.add(file_name)
def _IsLibrary(item): return item and item["type"] == "library"
def _IsLibraryGroup(item): return item and "library" in item
def _ReadDeps(deps_file, item, library_name):
global items, _line_number, _groups_to_be_defined
item_deps = item.get("deps")
while True:
line = _ReadLine(deps_file)
if not line: continue
if not line.startswith(" "): return line
if item_deps == None: item_deps = item["deps"] = set()
for dep in line.split():
_CheckGroupName(dep)
dep_item = items.get(dep)
if item["type"] == "system_symbols" and (_IsLibraryGroup(dep_item) or _IsLibrary(dep_item)):
sys.exit(("Error:%d: system_symbols depend on previously defined " +
"library or library group %s") % (_line_number, dep))
if dep_item == None:
# Add this dependency as a new group.
items[dep] = {"type": "group"}
if library_name: items[dep]["library"] = library_name
_groups_to_be_defined.add(dep)
item_deps.add(dep)
def _AddSystemSymbol(item, symbol):
exports = item.get("system_symbols")
if exports == None: exports = item["system_symbols"] = set()
exports.add(symbol)
def _ReadSystemSymbols(deps_file, item):
global _line_number
while True:
line = _ReadLine(deps_file)
if not line: continue
if not line.startswith(" "): return line
line = line.lstrip()
if '"' in line:
# One double-quote-enclosed symbol on the line, allows spaces in a symbol name.
symbol = line[1:-1]
if line.startswith('"') and line.endswith('"') and '"' not in symbol:
_AddSystemSymbol(item, symbol)
else:
sys.exit("Error:%d: invalid quoted symbol name %s" % (_line_number, line))
else:
# One or more space-separate symbols.
for symbol in line.split(): _AddSystemSymbol(item, symbol)
def Load():
"""Reads "dependencies.txt" and populates the module attributes."""
global items, libraries, _line_number, _groups_to_be_defined
deps_file = open("dependencies.txt")
try:
line = None
current_type = None
while True:
while not line: line = _RemoveComment(deps_file.next())
if line.startswith("library: "):
current_type = "library"
name = line[9:].lstrip()
_CheckLibraryName(name)
if name in items:
sys.exit("Error:%d: library definition using duplicate name %s" % (_line_number, name))
libraries.add(name)
item = items[name] = {"type": "library"}
line = _ReadFiles(deps_file, item, name)
elif line.startswith("group: "):
current_type = "group"
name = line[7:].lstrip()
_CheckGroupName(name)
if name not in items:
sys.exit("Error:%d: group %s defined before mentioned as a dependency" %
(_line_number, name))
if name not in _groups_to_be_defined:
sys.exit("Error:%d: group definition using duplicate name %s" % (_line_number, name))
_groups_to_be_defined.remove(name)
item = items[name]
library_name = item.get("library")
if library_name:
line = _ReadFiles(deps_file, item, library_name)
else:
line = _ReadSystemSymbols(deps_file, item)
elif line == " deps":
if current_type == "library":
line = _ReadDeps(deps_file, items[name], name)
elif current_type == "group":
item = items[name]
line = _ReadDeps(deps_file, item, item.get("library"))
elif current_type == "system_symbols":
item = items[current_type]
line = _ReadDeps(deps_file, item, None)
else:
sys.exit("Error:%d: deps before any library or group" % _line_number)
elif line == "system_symbols:":
current_type = "system_symbols"
if current_type in items:
sys.exit("Error:%d: duplicate entry for system_symbols" % _line_number)
item = items[current_type] = {"type": current_type}
line = _ReadSystemSymbols(deps_file, item)
else:
sys.exit("Syntax error:%d: %s" % (_line_number, line))
except StopIteration:
pass
if _groups_to_be_defined:
sys.exit("Error: some groups mentioned in dependencies are undefined: %s" % _groups_to_be_defined)

View file

@ -0,0 +1,893 @@
# Copyright (C) 2011, International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: dependencies.txt
#
# created on: 2011may26
# created by: Markus W. Scherer
# Standard library symbols used by ICU --------------------------------------- #
system_symbols:
deps
# C
PIC system_debug errno_perror malloc_functions c_strings c_string_formatting
floating_point trigonometry
stdlib_qsort
pthread system_locale
stdio_input stdio_output file_io readlink_function dir_io mmap_functions dlfcn
# C++
cplusplus iostream
group: PIC
# Position-Independent Code (-fPIC) requires a Global Offset Table.
_GLOBAL_OFFSET_TABLE_
group: system_debug
__assert_fail __stack_chk_fail
group: errno_perror
perror # putil.cpp uprv_dl_open() calls perror("dlopen")
group: malloc_functions
free malloc realloc
group: c_strings
isspace
__ctype_b_loc # for <ctype.h>
# We must not use tolower and toupper because they are system-locale-sensitive (Turkish i).
strlen strchr strrchr strstr strcmp strncmp strcpy strncpy strcat strncat
memcmp memcpy memmove memset
# Additional symbols in an optimized build.
__strcpy_chk __strncpy_chk __strcat_chk __strncat_chk
__rawmemchr __memcpy_chk __memmove_chk
group: c_string_formatting
atoi atol strtod strtol strtoul
sprintf
# Additional symbols in an optimized build.
__sprintf_chk
group: floating_point
floor ceil modf fmod log pow sqrt
group: trigonometry
acos asin atan atan2 cos sin tan
# Additional symbols in an optimized build.
sincos
group: stdlib_qsort
qsort
group: pthread
pthread_mutex_init pthread_mutex_destroy pthread_mutex_lock pthread_mutex_unlock
group: system_locale
getenv
nl_langinfo setlocale
gettimeofday localtime_r tzname tzset __timezone
group: stdio_input
fopen fclose fgets fread fseek ftell rewind feof fileno
# Additional symbols in an optimized build.
__fgets_chk __fread_chk
group: stdio_output
fflush fwrite
group: file_io
open close stat
# Additional symbols in an optimized build.
__xstat
group: readlink_function
readlink # putil.cpp uprv_tzname() calls this in a hack to get the time zone name
group: dir_io
opendir closedir readdir # for a hack to get the time zone name
group: mmap_functions # for memory-mapped data loading
mmap munmap
group: dlfcn
dlopen dlclose dlsym # called by putil.o only for icuplug.o
group: cplusplus
__dynamic_cast
# The compiler generates references to the global operator delete
# even when no code actually uses it.
# ICU must not _use_ the global operator delete.
"operator delete(void*)"
# ICU also must not use the global operator new.
# "operator new[](unsigned long)"
# _Unwind_Resume is related to exceptions:
# "A call to this routine is inserted as the end of a landing pad that performs cleanup,
# but does not resume normal execution. It causes unwinding to proceed further."
# (Linux Standard Base Specification 1.3)
# Even though ICU does not actually use (nor handle) exceptions.
_Unwind_Resume
group: iostream
"std::basic_ios<char, std::char_traits<char> >::clear(std::_Ios_Iostate)"
"std::basic_ios<char, std::char_traits<char> >::eof() const"
"std::basic_ios<char, std::char_traits<char> >::fail() const"
"std::basic_ostream<char, std::char_traits<char> >& std::operator<< <std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*)"
std::istream::get()
std::istream::putback(char)
# Additional symbols in an optimized build.
"std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)"
# ICU common library --------------------------------------------------------- #
library: stubdata
stubdata.o # Exports icudt48_dat.
library: common
# All files in the common library are listed in its dependencies.
deps
# Libraries and groups that the common library depends on.
date_interval
breakiterator
uts46 filterednormalizer2 normalizer2 canonical_iterator
normlzr unormcmp unorm_it unorm
idna2003 stringprep
stringenumeration
unistr_core unistr_props unistr_case unistr_case_locale unistr_titlecase_brkiter unistr_cnv
uniset_core uniset_props uniset_closure usetiter uset uset_props
uiter
ucasemap ucasemap_titlecase_brkiter script_runs
uprops ubidi_props ucase uscript
ubidi ushape
resourcebundle service_registration resbund_cnv ures_cnv icudataver ucat
loclikely
conversion converter_selector ucnv_set ucnvdisp
messagepattern
icu_utility icu_utility_with_props
ustr_wcs
ucharstriebuilder ucharstrieiterator
bytestriebuilder bytestrieiterator
hashtable uhash uvector uvector32 uvector64 ulist
propsvec utrie2 utrie2_builder
sort
uinit utypes errorcode
icuplug
platform
group: date_interval # class DateInterval
dtintrv.o
deps
platform
group: breakiterator
# We could try to split off a breakiterator_builder group,
# but we still need uniset_props for code like in the ThaiBreakEngine constructor
# which does
# fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status)
brkiter.o brkeng.o ubrk.o
rbbi.o rbbinode.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o
rbbidata.o rbbirb.o
triedict.o dictbe.o
deps
resourcebundle service_registration
schriter utext uniset_core uniset_props
uhash ustack utrie
uvector32 # for triedict.o
group: unormcmp # unorm_compare()
unormcmp.o
deps
filterednormalizer2
uniset_props # for uniset_getUnicode32Instance()
ucase
group: unorm_it # UNormIterator
unorm_it.o
deps
unorm uiter
group: unorm # old normalization C API
unorm.o
deps
filterednormalizer2
uniset_props # for uniset_getUnicode32Instance()
uiter
group: normlzr # old Normalizer C++ class
normlzr.o
deps
filterednormalizer2
uniset_props # for uniset_getUnicode32Instance()
schriter
group: uts46
uts46.o
deps
normalizer2 punycode
uchar # for u_charType() (via U_GET_GC_MASK(c))
ubidi_props # for u_charDirection() & ubidi_getJoiningType()
unistr_core
stringpiece bytestream
group: filterednormalizer2
filterednormalizer2.o
deps
normalizer2
group: idna2003
uidna.o
deps
stringprep punycode
group: stringprep
usprep.o
deps
unorm # could change to use filterednormalizer2 directly for Unicode 3.2 normalization
normalizer2
ubidi_props
group: canonical_iterator
caniter.o
deps
normalizer2 usetiter
group: normalizer2
normalizer2.o
normalizer2impl.o
deps
uniset_core
unistr_core
utrie2_builder # for building CanonIterData & FCD
uvector # for building CanonIterData
uhash # for the instance cache
udata
group: punycode
punycode.o
deps
platform
group: uset_props
uset_props.o
deps
uniset_closure uniset_props uniset_core
group: uset
uset.o
deps
uniset_core
group: uniset_closure
uniset_closure.o
deps
uniset_core unistr_case_locale unistr_titlecase_brkiter
group: uniset_props
uniset_props.o ruleiter.o
deps
uniset_core uprops unistr_case
parsepos
resourcebundle
propname unames
group: parsepos
parsepos.o
deps
platform
group: usetiter # UnicodeSetIterator
usetiter.o
deps
uniset_core
group: uniset_core
unifilt.o unifunct.o
uniset.o bmpset.o unisetspan.o
deps
patternprops
unistr_core icu_utility
uvector
group: icu_utility_with_props
util_props.o
deps
icu_utility uchar ucase
group: icu_utility
util.o
deps
unistr_core patternprops
group: utext
utext.o
deps
unistr_core ucase
group: stringenumeration
ustrenum.o uenum.o
deps
unistr_core
group: schriter
schriter.o
# The UCharCharacterIterator implements virtual void getText(UnicodeString& result)
# so it depends on UnicodeString, therefore it makes little sense to split
# schriter and uchriter into separate groups.
uchriter.o
deps
chariter unistr_core
group: chariter
chariter.o
deps
platform
group: uiter
uiter.o
deps
platform
group: unistr_cnv
unistr_cnv.o
deps
conversion unistr_core
group: unistr_core
unistr.o
deps
ustrtrns appendable
group: uscript
uscript.o # uscript_getCode() accepts a locale ID and loads its script code data
deps
propname resourcebundle
group: uprops
uprops.o
deps
normalizer2
uchar
ubidi_props
unistr_case ustring_case # only for case folding
ucase
group: propname
propname.o
deps
bytestrie
group: unames
unames.o
deps
uchar udata
group: script_runs
usc_impl.o
deps
uchar
group: uchar
uchar.o
deps
utrie2
group: messagepattern # for MessageFormat and tools
messagepattern.o
deps
patternprops unistr_core
group: patternprops
patternprops.o
deps
PIC
group: ushape
ushape.o
deps
ubidi_props
group: ubidi
ubidi.o ubidiln.o ubidiwrt.o
deps
ubidi_props
uchar # for doWriteReverse() which uses IS_COMBINING(u_charType(c))
group: ubidi_props
ubidi_props.o
deps
utrie2
group: unistr_props
unistr_props.o
deps
unistr_core uchar
group: unistr_case_locale
unistr_case_locale.o
deps
unistr_case ustring_case_locale
group: unistr_case
unistr_case.o
deps
unistr_core
ustring_case
group: unistr_titlecase_brkiter
unistr_titlecase_brkiter.o
deps
ustr_titlecase_brkiter
group: ustr_titlecase_brkiter
ustr_titlecase_brkiter.o
deps
breakiterator
ustring_case_locale ucase
group: ucasemap_titlecase_brkiter
ucasemap_titlecase_brkiter.o
deps
ucasemap breakiterator utext
group: ucasemap
ucasemap.o
deps
ustring_case
resourcebundle # uloc_getName() etc.
group: ustring_case_locale
ustrcase_locale.o
deps
ustring_case
resourcebundle # for uloc_getDefault()
group: ustring_case
ustrcase.o
deps
ucase
group: ucase
ucase.o
deps
utrie2
group: uinit
uinit.o
deps
ucnv_io icuplug
group: converter_selector
ucnvsel.o
deps
conversion propsvec utrie2_builder uset ucnv_set
group: ucnvdisp # ucnv_getDisplayName()
ucnvdisp.o
deps
conversion resourcebundle
group: ucnv_set # ucnv_getUnicodeSet
ucnv_set.o
deps
uset
group: conversion
ustr_cnv.o
ucnv.o ucnv_cnv.o ucnv_bld.o ucnv_cb.o ucnv_err.o
ucnv_ct.o
ucnvmbcs.o ucnv_ext.o
ucnvhz.o ucnvisci.o ucnv_lmb.o ucnv2022.o
ucnvlat1.o ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o
ucnvbocu.o ucnvscsu.o
deps
ucnv_io
group: ucnv_io
ucnv_io.o
deps
sort stringenumeration udata
group: service_registration
serv.o servnotf.o servlkf.o servlk.o servls.o servrbf.o servslkf.o
locutil.o
deps
locale_display_names resourcebundle
hashtable uvector
group: ucat # message-catalog-like API
ucat.o
deps
resourcebundle
group: locale_display_names
locdispnames.o
deps
locresdata
group: icudataver # u_getDataVersion()
icudataver.o
deps
resourcebundle
group: loclikely
loclikely.o
deps
resourcebundle
group: locresdata
# This was intended to collect locale functions that load resource bundle data.
# See the resourcebundle group about what else loads data.
locresdata.o
deps
resourcebundle
group: resbund_cnv # paths are Unicode strings
resbund_cnv.o
deps
conversion resourcebundle ures_cnv
group: ures_cnv # ures_openU, path is a Unicode string
ures_cnv.o
deps
conversion resourcebundle
group: resourcebundle
resbund.o uresbund.o uresdata.o
locavailable.o
# uloc_tag.c converts between old ICU/LDML/CLDR locale IDs and newer BCP 47 IDs.
# It uses data from resource bundles for some of the mappings.
# We might want to generate .c files for that data, to #include rather than load,
# to minimize dependencies from this code.
# Then we could separate this higher-level locale ID code from the resource bundle code.
uloc.o uloc_tag.o
# Even basic locid.cpp via Locale constructors and Locale::getDefault()
# depend on canonicalization and data loading.
# We can probably only disentangle basic locale ID handling from resource bundle code
# by hardcoding all of the locale ID data.
locid.o locmap.o wintz.o
# Do we need class LocaleBased? http://bugs.icu-project.org/trac/ticket/8608
locbased.o
deps
udata ucol_swp
sort stringenumeration uhash
group: udata
udata.o ucmndata.o udatamem.o
umapfile.o
deps
uhash charstr stringpiece platform stubdata
file_io mmap_functions
group: ucharstriebuilder
ucharstriebuilder.o
deps
ucharstrie stringtriebuilder sort
unistr_core
group: ucharstrieiterator
ucharstrieiterator.o
deps
ucharstrie unistr_core uvector32
group: ucharstrie
ucharstrie.o
deps
platform
group: bytestriebuilder
bytestriebuilder.o
deps
bytestrie stringtriebuilder sort
charstr stringpiece
group: bytestrieiterator
bytestrieiterator.o
deps
bytestrie charstr uvector32
group: bytestrie
bytestrie.o
deps
platform
group: stringtriebuilder
stringtriebuilder.o
deps
uhash
group: propsvec
propsvec.o
deps
sort utrie2_builder
group: utrie2_builder
utrie2_builder.o
deps
platform
utrie2
utrie # for utrie2_fromUTrie()
ucol_swp # for utrie_swap()
group: utrie2
utrie2.o
deps
platform
group: utrie # Callers should use utrie2 instead.
utrie.o
deps
platform
group: hashtable # Maps UnicodeString to value.
uhash_us.o
deps
unistr_core
uhash
group: uhash
uhash.o
deps
platform
group: ustack
ustack.o
deps
uvector
group: uvector
uvector.o
deps
platform
sort # for UVector::sort()
group: uvector32
uvectr32.o
deps
platform
group: uvector64
uvectr64.o
deps
platform
group: ulist
ulist.o
deps
platform
group: sort
uarrsort.o
deps
platform
group: ustr_wcs
ustr_wcs.o
deps
ustrtrns # on platforms where wchar_t is UTF-32
# platform -- on other platforms
group: ustrtrns
ustrtrns.o
deps
platform
group: charstr
charstr.o
deps
unistr_core # for CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode)
platform
group: stringpiece
stringpiece.o
deps
PIC c_strings
group: bytestream
bytestream.o
deps
platform
group: appendable
appendable.o
deps
platform
group: icuplug
icuplug.o
deps
platform
group: ucol_swp
ucol_swp.o
deps
platform
group: errorcode # ErrorCode base class
errorcode.o
deps
utypes
PIC
group: utypes # u_errorName()
utypes.o
group: platform
# Files in the "platform" group.
cmemory.o uobject.o
cstring.o cwchar.o uinvchar.o
ustring.o # Other platform files really just need u_strlen
ustrfmt.o # uprv_itou
utf_impl.o
putil.o
ucln_cmn.o # for putil.o which calls ucln_common_registerCleanup
udataswp.o # for uinvchar.o; TODO: move uinvchar.o swapper functions to udataswp.o?
umath.o
mutex.o umutex.o
utrace.o
deps
# The "platform" group has no ICU dependencies.
PIC system_debug malloc_functions c_strings c_string_formatting
floating_point pthread system_locale
stdio_input readlink_function dir_io
errno_perror dlfcn # Move related code into icuplug.c?
cplusplus
# ICU i18n library ----------------------------------------------------------- #
library: i18n
deps
localedata charset_detector spoof_detection
alphabetic_index collation formatting formattable_cnv regex regex_cnv translit
universal_time_scale
uclean_i18n
group: localedata
ulocdata.o
deps
uniset_props resourcebundle
uset_props # TODO: change to using C++ UnicodeSet, remove this dependency
group: charset_detector
csdetect.o csmatch.o csr2022.o csrecog.o csrmbcs.o csrsbcs.o csrucode.o csrutf8.o inputext.o ucsdet.o
deps
conversion
uclean_i18n
group: spoof_detection
uspoof.o uspoof_build.o uspoof_conf.o uspoof_impl.o uspoof_wsconf.o
deps
uniset_props regex unorm uscript
group: alphabetic_index
alphaindex.o
deps
collation localedata
uclean_i18n
group: collation
bocsu.o coleitr.o coll.o colldata.o sortkey.o tblcoll.o ucol.o
ucol_bld.o ucol_cnt.o ucol_elm.o ucol_res.o ucol_sit.o ucol_tok.o ucol_wgt.o ucoleitr.o
bms.o bmsearch.o search.o stsearch.o usearch.o
deps
common # TODO: Could be narrower.
uclean_i18n
group: formatting
# TODO: Try to subdivide this ball of wax.
# locale_display_names2
locdspnm.o
# currency
ucurr.o
# currencyformat
curramt.o currfmt.o currpinf.o currunit.o
# decimalformat
dcfmtsym.o decfmtst.o decimfmt.o
numfmt.o numsys.o unum.o winnmfmt.o
# rbnf
nfrs.o nfrule.o nfsubs.o rbnf.o
# measureformat
measfmt.o
# dateformat
astro.o buddhcal.o calendar.o cecal.o chnsecal.o coptccal.o ethpccal.o
gregocal.o gregoimp.o hebrwcal.o indiancal.o islamcal.o japancal.o persncal.o taiwncal.o
ucal.o
basictz.o olsontz.o rbtz.o simpletz.o timezone.o tzrule.o tztrans.o
vtzone.o vzone.o wintzimpl.o zonemeta.o zrule.o ztrans.o
tzfmt.o tzgnames.o tznames.o tznames_impl.o
datefmt.o dtfmtsym.o dtitvfmt.o dtitvinf.o dtptngen.o dtrule.o reldtfmt.o
smpdtfmt.o smpdtfst.o udateintervalformat.o udatpg.o windtfmt.o
udat.o
tmunit.o tmutamt.o tmutfmt.o
# messageformat
choicfmt.o msgfmt.o plurfmt.o selfmt.o umsg.o
deps
digitlist formattable format
pluralrules
collation # for rbnf
common
floating_point # sqrt() for astro.o
trigonometry # for astro.o
stdlib_qsort # for ucurr.o (which does not use ICU's uarrsort.o)
uclean_i18n
group: digitlist
digitlst.o decContext.o decNumber.o
deps
charstr stringpiece unistr_core
group: formattable
fmtable.o
measure.o
deps
unistr_core digitlist stringpiece charstr
group: formattable_cnv
fmtable_cnv.o
deps
formattable unistr_cnv conversion
group: format
format.o fphdlimp.o fpositer.o
deps
resourcebundle parsepos unistr_core uvector32
group: pluralrules
plurrule.o upluralrules.o
deps
patternprops resourcebundle uvector
unistr_case_locale
group: regex_cnv
uregexc.o
deps
regex unistr_cnv
group: regex
regexcmp.o regexst.o regextxt.o rematch.o repattrn.o uregex.o
deps
uniset_closure utext uvector32 uvector64 ustack
breakiterator
unistr_core
uinit # TODO: Really needed?
uclean_i18n
group: translit
anytrans.o brktrans.o casetrn.o cpdtrans.o name2uni.o uni2name.o nortrans.o remtrans.o titletrn.o tolowtrn.o toupptrn.o
esctrn.o unesctrn.o nultrans.o
funcrepl.o quant.o rbt.o rbt_data.o rbt_pars.o rbt_rule.o rbt_set.o strmatch.o strrepl.o translit.o transreg.o tridpars.o utrans.o
deps
common
formatting # for Transliterator::getDisplayName()
uclean_i18n
group: universal_time_scale
utmscale.o
group: uclean_i18n
ucln_in.o
deps
platform
# ICU io library ------------------------------------------------------------- #
library: io
deps
ustdio ustream uclean_io
group: ustdio
locbund.o sprintf.o sscanf.o ufile.o ufmt_cmn.o uprintf.o uprntf_p.o uscanf.o uscanf_p.o ustdio.o
deps
formatting conversion translit
uclean_io
stdio_output
group: ustream
ustream.o
deps
unistr_cnv
uchar # for u_isWhitespace()
iostream
group: uclean_io
ucln_io.o
deps
platform

View file

@ -0,0 +1,167 @@
#! /usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2011, International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: depstest.py
#
# created on: 2011may24
"""ICU dependency tester.
This probably works only on Linux.
The exit code is 0 if everything is fine, 1 for errors, 2 for only warnings.
Sample invocation:
~/svn.icu/trunk/src/source/test/depstest$ ./depstest.py ~/svn.icu/trunk/dbg
"""
__author__ = "Markus W. Scherer"
import glob
import os.path
import subprocess
import sys
import dependencies
_ignored_symbols = set()
_obj_files = {}
_symbols_to_files = {}
_return_value = 0
def _ReadObjFile(root_path, library_name, obj_name):
global _ignored_symbols, _obj_files, _symbols_to_files
lib_obj_name = library_name + "/" + obj_name
if lib_obj_name in _obj_files:
print "Warning: duplicate .o file " + lib_obj_name
_return_value = 2
return
path = os.path.join(root_path, library_name, obj_name)
nm_result = subprocess.Popen(["nm", "--demangle", "--format=sysv",
"--extern-only", "--no-sort", path],
stdout=subprocess.PIPE).communicate()[0]
obj_imports = set()
obj_exports = set()
for line in nm_result.splitlines():
fields = line.split("|")
if len(fields) == 1: continue
name = fields[0].strip()
# Ignore symbols like '__cxa_pure_virtual',
# 'vtable for __cxxabiv1::__si_class_type_info' or
# 'DW.ref.__gxx_personality_v0'.
if name.startswith("__cxa") or "__cxxabi" in name or "__gxx" in name:
_ignored_symbols.add(name)
continue
type = fields[2].strip()
if type == "U":
obj_imports.add(name)
else:
# TODO: Investigate weak symbols (V, W) with or without values.
obj_exports.add(name)
_symbols_to_files[name] = lib_obj_name
_obj_files[lib_obj_name] = {"imports": obj_imports, "exports": obj_exports}
def _ReadLibrary(root_path, library_name):
obj_paths = glob.glob(os.path.join(root_path, library_name, "*.o"))
for path in obj_paths:
_ReadObjFile(root_path, library_name, os.path.basename(path))
def _Resolve(name, parents):
global _ignored_symbols, _obj_files, _symbols_to_files, _return_value
item = dependencies.items[name]
item_type = item["type"]
if name in parents:
sys.exit("Error: %s %s has a circular dependency on itself: %s" %
(item_type, name, parents))
# Check if already cached.
exports = item.get("exports")
if exports != None: return item
# Calculcate recursively.
parents.append(name)
imports = set()
exports = set()
system_symbols = item.get("system_symbols")
if system_symbols == None: system_symbols = item["system_symbols"] = set()
files = item.get("files")
if files:
for file_name in files:
obj_file = _obj_files[file_name]
imports |= obj_file["imports"]
exports |= obj_file["exports"]
imports -= exports | _ignored_symbols
deps = item.get("deps")
if deps:
for dep in deps:
dep_item = _Resolve(dep, parents)
# Detect whether this item needs to depend on dep,
# except when this item has no files, that is, when it is just
# a deliberate umbrella group or library.
dep_exports = dep_item["exports"]
dep_system_symbols = dep_item["system_symbols"]
if files and imports.isdisjoint(dep_exports) and imports.isdisjoint(dep_system_symbols):
print "Info: %s %s does not need to depend on %s\n" % (item_type, name, dep)
# We always include the dependency's exports, even if we do not need them
# to satisfy local imports.
exports |= dep_exports
system_symbols |= dep_system_symbols
item["exports"] = exports
item["system_symbols"] = system_symbols
imports -= exports | system_symbols
for symbol in imports:
for file_name in files:
if symbol in _obj_files[file_name]["imports"]:
sys.stderr.write("Error: %s %s file %s imports %s but %s does not depend on %s\n" %
(item_type, name, file_name, symbol, name, _symbols_to_files.get(symbol)))
_return_value = 1
del parents[-1]
return item
def Process(root_path):
"""Loads dependencies.txt, reads the libraries' .o files, and processes them.
Modifies dependencies.items: Recursively builds each item's system_symbols and exports.
"""
global _ignored_symbols, _obj_files, _return_value
dependencies.Load()
for name_and_item in dependencies.items.iteritems():
name = name_and_item[0]
item = name_and_item[1]
system_symbols = item.get("system_symbols")
if system_symbols:
for symbol in system_symbols:
_symbols_to_files[symbol] = name
for library_name in dependencies.libraries:
_ReadLibrary(root_path, library_name)
o_files_set = set(_obj_files.keys())
files_missing_from_deps = o_files_set - dependencies.files
files_missing_from_build = dependencies.files - o_files_set
if files_missing_from_deps:
sys.stderr.write("Error: files missing from dependencies.txt:\n%s\n" %
sorted(files_missing_from_deps))
_return_value = 1
if files_missing_from_build:
sys.stderr.write("Error: files in dependencies.txt but not built:\n%s\n" %
sorted(files_missing_from_build))
_return_value = 1
if not _return_value:
for library_name in dependencies.libraries:
_Resolve(library_name, [])
def main():
global _return_value
if len(sys.argv) <= 1:
sys.exit(("Command line error: " +
"need one argument with the root path to the built ICU libraries/*.o files."))
Process(sys.argv[1])
if _ignored_symbols:
print "Info: ignored symbols:\n%s" % sorted(_ignored_symbols)
if not _return_value:
print "OK: Specified and actual dependencies match."
return _return_value
if __name__ == "__main__":
sys.exit(main())

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 2002-2010, International Business Machines Corporation and
* Copyright (c) 2002-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************
*
@ -130,7 +130,7 @@ void CanonicalIteratorTest::TestBasic() {
// NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted!
Hashtable *permutations = new Hashtable(FALSE, status);
permutations->setValueDeleter(uhash_deleteUnicodeString);
permutations->setValueDeleter(uprv_deleteUObject);
UnicodeString toPermute("ABC");
CanonicalIterator::permute(toPermute, FALSE, permutations, status);
@ -144,7 +144,7 @@ void CanonicalIteratorTest::TestBasic() {
// try samples
logln("testing samples");
Hashtable *set = new Hashtable(FALSE, status);
set->setValueDeleter(uhash_deleteUnicodeString);
set->setValueDeleter(uprv_deleteUObject);
int32_t i = 0;
CanonicalIterator it("", status);
if(U_SUCCESS(status)) {

View file

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 2001-2010, International Business Machines Corporation and
* Copyright (C) 2001-2011, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
@ -458,7 +458,7 @@ ICUServiceTest::testAPI_One()
// should not be able to locate invisible services
{
UErrorCode status = U_ZERO_ERROR;
UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, status);
service.getVisibleIDs(ids, status);
UnicodeString target = "en_US_BAR";
confirmBoolean("18) find invisible", !ids.contains(&target));
@ -551,7 +551,7 @@ class TestMultipleKeyStringFactory : public ICUServiceFactory {
public:
TestMultipleKeyStringFactory(const UnicodeString ids[], int32_t count, const UnicodeString& factoryID)
: _status(U_ZERO_ERROR)
, _ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, count, _status)
, _ids(uprv_deleteUObject, uhash_compareUnicodeString, count, _status)
, _factoryID(factoryID + ": ")
{
for (int i = 0; i < count; ++i) {
@ -680,7 +680,7 @@ ICUServiceTest::testAPI_Two()
// iterate over the visual ids returned by the multiple factory
{
UErrorCode status = U_ZERO_ERROR;
UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status);
UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status);
service.getVisibleIDs(ids, status);
for (int i = 0; i < ids.size(); ++i) {
const UnicodeString* id = (const UnicodeString*)ids[i];
@ -801,7 +801,7 @@ ICUServiceTest::testAPI_Two()
{
UErrorCode status = U_ZERO_ERROR;
UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status);
UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status);
service.getVisibleIDs(ids, status);
for (int i = 0; i < ids.size(); ++i) {
const UnicodeString* id = (const UnicodeString*)ids[i];
@ -897,7 +897,7 @@ ICUServiceTest::testRBF()
// list all of the resources
{
UErrorCode status = U_ZERO_ERROR;
UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status);
UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status);
service.getVisibleIDs(ids, status);
logln("all visible ids:");
for (int i = 0; i < ids.size(); ++i) {
@ -1164,7 +1164,7 @@ void ICUServiceTest::testLocale() {
{
UErrorCode status = U_ZERO_ERROR;
UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status);
UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status);
service.getVisibleIDs(ids, status);
logln("all visible ids:");
for (int i = 0; i < ids.size(); ++i) {
@ -1176,7 +1176,7 @@ void ICUServiceTest::testLocale() {
Locale::setDefault(loc, status);
{
UErrorCode status = U_ZERO_ERROR;
UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status);
UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status);
service.getVisibleIDs(ids, status);
logln("all visible ids:");
for (int i = 0; i < ids.size(); ++i) {
@ -1368,7 +1368,7 @@ void ICUServiceTest::testCoverage()
}
}
UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, status);
// yuck, this is awkward to use. All because we pass null in an overload.
// TODO: change this.
UnicodeString str("Greet");

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2003-2010, International Business Machines Corporation and *
* Copyright (C) 2003-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -353,7 +353,7 @@ void CollationServiceTest::TestRegisterFactory(void)
errln("memory allocation error");
return;
}
fuFUNames->setValueDeleter(uhash_deleteUnicodeString);
fuFUNames->setValueDeleter(uprv_deleteUObject);
fuFUNames->put(fu_FU.getName(), new UnicodeString("ze leetle bunny Fu-Fu"), status);
fuFUNames->put(fu_FU_FOO.getName(), new UnicodeString("zee leetel bunny Foo-Foo"), status);

View file

@ -1573,7 +1573,7 @@ public:
Hashtable contents;
TokenSymbolTable(UErrorCode& ec) : contents(FALSE, ec) {
contents.setValueDeleter(uhash_deleteUnicodeString);
contents.setValueDeleter(uprv_deleteUObject);
}
~TokenSymbolTable() {}

View file

@ -886,7 +886,7 @@ struct SResource* array_open(struct SRBRoot *bundle, const char *tag, const stru
static int32_t U_CALLCONV
string_hash(const UHashTok key) {
const struct SResource *res = (struct SResource *)key.pointer;
return uhash_hashUCharsN(res->u.fString.fChars, res->u.fString.fLength);
return ustr_hashUCharsN(res->u.fString.fChars, res->u.fString.fLength);
}
static UBool U_CALLCONV