ICU-2329 replace APIs using char * for transliterator IDs with APIs using UChar * - to allow for more than invariant characters

X-SVN-Rev: 12586
This commit is contained in:
Markus Scherer 2003-07-02 17:57:02 +00:00
parent 84bd0babfa
commit 9ec6b45611
3 changed files with 384 additions and 159 deletions

View file

@ -17,6 +17,7 @@
#include "unicode/urep.h"
#include "unicode/parseerr.h"
#include "unicode/uenum.h"
/********************************************************************
* General Notes
@ -37,6 +38,18 @@
* services are available to C code through this header. In order to
* access more complex transliteration services, refer to the C++
* headers and documentation.
*
* There are two sets of functions for working with transliterator IDs:
*
* An old, deprecated set uses char * IDs, which works for true and pure
* identifiers that these APIs were designed for,
* for example "Cyrillic-Latin".
* It does not work when the ID contains filters ("[:Script=Cyrl:]")
* or even a complete set of rules because then the ID string contains more
* than just "invariant" characters (see utypes.h).
*
* A new set of functions replaces the old ones and uses UChar * IDs,
* paralleling the UnicodeString IDs in the C++ API. (New in ICU 2.8.)
*/
/********************************************************************
@ -155,29 +168,30 @@ typedef struct UTransPosition {
* Any non-NULL result from this function should later be closed with
* utrans_close().
*
* @param id a valid ID, as returned by utrans_getAvailableID()
* @param id a valid transliterator ID
* @param idLength the length of the ID string, or -1 if NUL-terminated
* @param dir the desired direction
* @param rules the transliterator rules. See the C++ header rbt.h
* for rules syntax. If NULL then a system transliterator matching
* the ID is returned.
* @param rules the transliterator rules. See the C++ header rbt.h for
* rules syntax. If NULL then a system transliterator matching
* the ID is returned.
* @param rulesLength the length of the rules, or -1 if the rules
* are zero-terminated.
* @param dir the desired direction
* @param parseError a pointer to a UParseError struct to receive the
* details of any parsing errors. This parameter may be NULL if no
* parsing error details are desired.
* @param status a pointer to the UErrorCode
* are NUL-terminated.
* @param parseError a pointer to a UParseError struct to receive the details
* of any parsing errors. This parameter may be NULL if no
* parsing error details are desired.
* @param pErrorCode a pointer to the UErrorCode
* @return a transliterator pointer that may be passed to other
* utrans_xxx() functions, or NULL if the open call fails.
* @stable ICU 2.0
* utrans_xxx() functions, or NULL if the open call fails.
* @draft ICU 2.8
*/
U_CAPI UTransliterator* U_EXPORT2
utrans_open(const char* id,
UTransDirection dir,
const UChar* rules, /* may be Null */
int32_t rulesLength, /* -1 if null-terminated */
UParseError* parseError, /* may be Null */
UErrorCode* status);
U_CAPI UTransliterator* U_EXPORT2
utrans_openU(const UChar *id,
int32_t idLength,
UTransDirection dir,
const UChar *rules,
int32_t rulesLength,
UParseError *parseError,
UErrorCode *pErrorCode);
/**
* Open an inverse of an existing transliterator. For this to work,
@ -223,22 +237,20 @@ utrans_close(UTransliterator* trans);
/**
* Return the programmatic identifier for this transliterator.
* If this identifier is passed to utrans_open(), it will open
* If this identifier is passed to utrans_openU(), it will open
* a transliterator equivalent to this one, if the ID has been
* registered.
*
* @param trans the transliterator to return the ID of.
* @param buf the buffer in which to receive the ID. This may be
* NULL, in which case no characters are copied.
* @param bufCapacity the capacity of the buffer. Ignored if buf is
* NULL.
* @return the actual length of the ID, not including
* zero-termination. This may be greater than bufCapacity.
* @stable ICU 2.0
* @param resultLength pointer to an output variable receiving the length
* of the ID string; can be NULL
* @return the NUL-terminated ID string
*
* @draft ICU 2.8
*/
U_CAPI int32_t U_EXPORT2
utrans_getID(const UTransliterator* trans,
char* buf,
int32_t bufCapacity);
U_CAPI const UChar * U_EXPORT2
utrans_getUnicodeID(const UTransliterator *trans,
int32_t *resultLength);
/**
* Register an open transliterator with the system. When
@ -261,13 +273,13 @@ utrans_register(UTransliterator* adoptedTrans,
/**
* Unregister a transliterator from the system. After this call the
* system will no longer recognize the given ID when passed to
* utrans_open(). If the id is invalid then nothing is done.
* utrans_open(). If the ID is invalid then nothing is done.
*
* @param id a zero-terminated ID
* @stable ICU 2.0
* @param id a NUL-terminated ID
* @draft ICU 2.8
*/
U_CAPI void U_EXPORT2
utrans_unregister(const char* id);
U_CAPI void U_EXPORT2
utrans_unregisterID(const UChar* id, int32_t idLength);
/**
* Set the filter used by a transliterator. A filter can be used to
@ -295,6 +307,8 @@ utrans_setFilter(UTransliterator* trans,
/**
* Return the number of system transliterators.
* It is recommended to use utrans_openIDs() instead.
*
* @return the number of system transliterators.
* @stable ICU 2.0
*/
@ -302,26 +316,16 @@ U_CAPI int32_t U_EXPORT2
utrans_countAvailableIDs(void);
/**
* Return the ID of the index-th system transliterator. The result
* is placed in the given buffer. If the given buffer is too small,
* the initial substring is copied to buf. The result in buf is
* always zero-terminated.
* Return a UEnumeration for the available transliterators.
*
* @param index the number of the transliterator to return. Must
* satisfy 0 <= index < utrans_countAvailableIDs(). If index is out
* of range then it is treated as if it were 0.
* @param buf the buffer in which to receive the ID. This may be
* NULL, in which case no characters are copied.
* @param bufCapacity the capacity of the buffer. Ignored if buf is
* NULL.
* @return the actual length of the index-th ID, not including
* zero-termination. This may be greater than bufCapacity.
* @stable ICU 2.0
* @param pErrorCode Pointer to the UErrorCode in/out parameter.
* @return UEnumeration for the available transliterators.
* Close with uenum_close().
*
* @draft ICU 2.8
*/
U_CAPI int32_t U_EXPORT2
utrans_getAvailableID(int32_t index,
char* buf,
int32_t bufCapacity);
U_CAPI UEnumeration * U_EXPORT2
utrans_openIDs(UErrorCode *pErrorCode);
/********************************************************************
* Transliteration API
@ -482,32 +486,95 @@ utrans_transIncrementalUChars(const UTransliterator* trans,
UTransPosition* pos,
UErrorCode* status);
/* deprecated API ----------------------------------------------------------- */
/* see utrans.h documentation for why these functions are deprecated */
/********************* Obsolete API ************************************/
/**
* TODO: Remove after Aug 2002
* Deprecated, use utrans_openU() instead.
* Open a custom transliterator, given a custom rules string
* OR
* a system transliterator, given its ID.
* Any non-NULL result from this function should later be closed with
* utrans_close().
*
* @param id a valid ID, as returned by utrans_getAvailableID()
* @param dir the desired direction
* @param rules the transliterator rules. See the C++ header rbt.h
* for rules syntax. If NULL then a system transliterator matching
* the ID is returned.
* @param rulesLength the length of the rules, or -1 if the rules
* are zero-terminated.
* @param parseError a pointer to a UParseError struct to receive the
* details of any parsing errors. This parameter may be NULL if no
* parsing error details are desired.
* @param status a pointer to the UErrorCode
* @return a transliterator pointer that may be passed to other
* utrans_xxx() functions, or NULL if the open call fails.
* @deprecated ICU 2.8 Use utrans_openU() instead, see utrans.h
*/
U_CAPI UTransliterator* U_EXPORT2
utrans_open(const char* id,
UTransDirection dir,
const UChar* rules, /* may be Null */
int32_t rulesLength, /* -1 if null-terminated */
UParseError* parseError, /* may be Null */
UErrorCode* status);
#ifdef U_USE_DEPRECATED_FORMAT_API
/**
* Deprecated, use utrans_getUnicodeID() instead.
* Return the programmatic identifier for this transliterator.
* If this identifier is passed to utrans_open(), it will open
* a transliterator equivalent to this one, if the ID has been
* registered.
* @param trans the transliterator to return the ID of.
* @param buf the buffer in which to receive the ID. This may be
* NULL, in which case no characters are copied.
* @param bufCapacity the capacity of the buffer. Ignored if buf is
* NULL.
* @return the actual length of the ID, not including
* zero-termination. This may be greater than bufCapacity.
* @deprecated ICU 2.8 Use utrans_getUnicodeID() instead, see utrans.h
*/
U_CAPI int32_t U_EXPORT2
utrans_getID(const UTransliterator* trans,
char* buf,
int32_t bufCapacity);
#if ((U_ICU_VERSION_MAJOR_NUM != 2) || (U_ICU_VERSION_MINOR_NUM != 2))
# error "ICU version has changed. Please redefine the macros under U_USE_DEPRECATED_FORMAT_API pre-processor definition"
#else
U_CAPI UTransliterator* U_EXPORT2
utrans_openRules(const char* id,
const UChar* rules,
int32_t rulesLength, /* -1 if null-terminated */
UTransDirection dir,
UParseError* parseErr, /* may be NULL */
UErrorCode* status){
return utrans_open(id,dir,rules,rulesLength,parseErr,status);
}
/**
* Deprecated, use utrans_unregisterID() instead.
* Unregister a transliterator from the system. After this call the
* system will no longer recognize the given ID when passed to
* utrans_open(). If the id is invalid then nothing is done.
*
* @param id a zero-terminated ID
* @deprecated ICU 2.8 Use utrans_unregisterID() instead, see utrans.h
*/
U_CAPI void U_EXPORT2
utrans_unregister(const char* id);
# define utrans_open_2_2(id,dir,status) utrans_open(id,dir,NULL,0,NULL,status)
#endif
#endif
/********************* End **********************************************/
/**
* Deprecated, use utrans_openIDs() instead.
* Return the ID of the index-th system transliterator. The result
* is placed in the given buffer. If the given buffer is too small,
* the initial substring is copied to buf. The result in buf is
* always zero-terminated.
*
* @param index the number of the transliterator to return. Must
* satisfy 0 <= index < utrans_countAvailableIDs(). If index is out
* of range then it is treated as if it were 0.
* @param buf the buffer in which to receive the ID. This may be
* NULL, in which case no characters are copied.
* @param bufCapacity the capacity of the buffer. Ignored if buf is
* NULL.
* @return the actual length of the index-th ID, not including
* zero-termination. This may be greater than bufCapacity.
* @deprecated ICU 2.8 Use utrans_openIDs() instead, see utrans.h
*/
U_CAPI int32_t U_EXPORT2
utrans_getAvailableID(int32_t index,
char* buf,
int32_t bufCapacity);
#endif /* #if !UCONFIG_NO_TRANSLITERATION */

View file

@ -19,6 +19,8 @@
#include "unicode/unifilt.h"
#include "unicode/uniset.h"
#include "unicode/ustring.h"
#include "unicode/uenum.h"
#include "uenumimp.h"
#include "cpputils.h"
#include "rbt.h"
@ -132,84 +134,18 @@ U_NAMESPACE_END
* General API
********************************************************************/
U_NAMESPACE_USE
#if 0
U_CAPI UTransliterator*
utrans_open(const char* id,
UTransDirection dir,
UParseError* parseError,
UErrorCode* status) {
utrans_ENTRY(status) NULL;
if (id == NULL) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
UnicodeString ID(id, ""); // use invariant converter
Transliterator *trans = NULL;
trans = Transliterator::createInstance(ID, dir, *parseError, *status);
if (trans == NULL) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
return (UTransliterator*) trans;
}
U_CAPI UTransliterator*
utrans_openRules(const char* id,
const UChar* rules,
int32_t rulesLength, /* -1 if null-terminated */
UTransDirection dir,
UParseError* parseErr, /* may be NULL */
UErrorCode* status) {
utrans_ENTRY(status) NULL;
if (id == NULL || rules == NULL) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
UnicodeString ID(id, ""); // use invariant converter
UnicodeString ruleStr(rulesLength < 0,
rules,
rulesLength); // r-o alias
RuleBasedTransliterator *trans = NULL;
// Use if() to avoid construction of ParseError object on stack
// unless it is called for by user.
if (parseErr != NULL) {
trans = new RuleBasedTransliterator(ID, ruleStr, dir,
NULL, *parseErr, *status);
} else {
trans = new RuleBasedTransliterator(ID, ruleStr, dir,
NULL, *status);
}
if (trans == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
} else if (U_FAILURE(*status)) {
delete trans;
trans = NULL;
}
return (UTransliterator*) trans;
}
#endif
U_CAPI UTransliterator* U_EXPORT2
utrans_open(const char* id,
UTransDirection dir,
const UChar* rules, /* may be Null */
int32_t rulesLength, /* -1 if null-terminated */
UParseError* parseError, /* may be Null */
UErrorCode* status) {
utrans_ENTRY(status) NULL;
utrans_openU(const UChar *id,
int32_t idLength,
UTransDirection dir,
const UChar *rules,
int32_t rulesLength,
UParseError *parseError,
UErrorCode *status) {
if(status==NULL || U_FAILURE(*status)) {
return NULL;
}
if (id == NULL) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
@ -220,7 +156,7 @@ utrans_open(const char* id,
parseError = &temp;
}
UnicodeString ID(id, ""); // use invariant converter
UnicodeString ID(idLength<0, id, idLength); // r-o alias
if(rules==NULL){
@ -250,6 +186,19 @@ utrans_open(const char* id,
}
}
U_CAPI UTransliterator* U_EXPORT2
utrans_open(const char* id,
UTransDirection dir,
const UChar* rules, /* may be Null */
int32_t rulesLength, /* -1 if null-terminated */
UParseError* parseError, /* may be Null */
UErrorCode* status) {
UnicodeString ID(id, ""); // use invariant converter
return utrans_openU(ID.getBuffer(), ID.length(), dir,
rules, rulesLength,
parseError, status);
}
U_CAPI UTransliterator* U_EXPORT2
utrans_openInverse(const UTransliterator* trans,
UErrorCode* status) {
@ -285,6 +234,17 @@ utrans_close(UTransliterator* trans) {
delete (Transliterator*) trans;
}
U_CAPI const UChar * U_EXPORT2
utrans_getUnicodeID(const UTransliterator *trans,
int32_t *resultLength) {
// Transliterator keeps its ID NUL-terminated
const UnicodeString &ID=((Transliterator*) trans)->getID();
if(resultLength!=NULL) {
*resultLength=ID.length();
}
return ID.getBuffer();
}
U_CAPI int32_t U_EXPORT2
utrans_getID(const UTransliterator* trans,
char* buf,
@ -300,6 +260,12 @@ utrans_register(UTransliterator* adoptedTrans,
Transliterator::registerInstance((Transliterator*) adoptedTrans);
}
U_CAPI void U_EXPORT2
utrans_unregisterID(const UChar* id, int32_t idLength) {
UnicodeString ID(idLength<0, id, idLength); // r-o alias
Transliterator::unregister(ID);
}
U_CAPI void U_EXPORT2
utrans_unregister(const char* id) {
UnicodeString ID(id, ""); // use invariant converter
@ -343,6 +309,93 @@ utrans_getAvailableID(int32_t index,
return Transliterator::getAvailableID(index).extract(0, 0x7fffffff, buf, bufCapacity, "");
}
/* Transliterator UEnumeration ---------------------------------------------- */
typedef struct UTransEnumeration {
UEnumeration uenum;
int32_t index, count;
} UTransEnumeration;
static int32_t U_CALLCONV
utrans_enum_count(UEnumeration *uenum, UErrorCode *pErrorCode) {
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
return ((UTransEnumeration *)uenum)->count;
}
static const UChar* U_CALLCONV
utrans_enum_unext(UEnumeration *uenum,
int32_t* resultLength,
UErrorCode *pErrorCode) {
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
UTransEnumeration *ute=(UTransEnumeration *)uenum;
int32_t index=ute->index;
if(index<ute->count) {
const UnicodeString &ID=Transliterator::getAvailableID(index);
ute->index=index+1;
if(resultLength!=NULL) {
*resultLength=ID.length();
}
// Transliterator keeps its ID NUL-terminated
return ID.getBuffer();
}
if(resultLength!=NULL) {
*resultLength=NULL;
}
return NULL;
}
static void U_CALLCONV
utrans_enum_reset(UEnumeration *uenum, UErrorCode *pErrorCode) {
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return;
}
UTransEnumeration *ute=(UTransEnumeration *)uenum;
ute->index=0;
ute->count=Transliterator::countAvailableIDs();
}
static void U_CALLCONV
utrans_enum_close(UEnumeration *uenum) {
uprv_free(uenum);
}
static const UEnumeration utransEnumeration={
NULL,
NULL,
utrans_enum_close,
utrans_enum_count,
utrans_enum_unext,
uenum_nextDefault,
utrans_enum_reset
};
U_CAPI UEnumeration * U_EXPORT2
utrans_openIDs(UErrorCode *pErrorCode) {
UTransEnumeration *ute;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return NULL;
}
ute=(UTransEnumeration *)uprv_malloc(sizeof(UTransEnumeration));
if(ute==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
ute->uenum=utransEnumeration;
ute->index=0;
ute->count=Transliterator::countAvailableIDs();
return (UEnumeration *)ute;
}
/********************************************************************
* Transliteration API
********************************************************************/

View file

@ -173,6 +173,71 @@ static void TestAPI() {
}
}
static void TestUnicodeIDs() {
UEnumeration *uenum;
UTransliterator *utrans;
const UChar *id, *id2;
int32_t idLength, id2Length, count, count2;
UErrorCode errorCode;
errorCode=U_ZERO_ERROR;
uenum=utrans_openIDs(&errorCode);
if(U_FAILURE(errorCode)) {
log_err("utrans_openIDs() failed - %s\n", u_errorName(errorCode));
return;
}
count=uenum_count(uenum, &errorCode);
if(U_FAILURE(errorCode) || count<1) {
log_err("uenum_count(transliterator IDs)=%d - %s\n", count, u_errorName(errorCode));
}
count=0;
for(;;) {
id=uenum_unext(uenum, &idLength, &errorCode);
if(U_FAILURE(errorCode)) {
log_err("uenum_unext(transliterator ID %d) failed - %s\n", count, u_errorName(errorCode));
break;
}
if(id==NULL) {
break;
}
if(++count==5) {
/* try to actually open only a few transliterators */
continue;
}
utrans=utrans_openU(id, idLength, UTRANS_FORWARD, NULL, 0, NULL, &errorCode);
if(U_FAILURE(errorCode)) {
log_err("utrans_openU(%s) failed - %s\n", aescstrdup(id, idLength), u_errorName(errorCode));
continue;
}
id2=utrans_getUnicodeID(utrans, &id2Length);
if(idLength!=id2Length || 0!=u_memcmp(id, id2, idLength)) {
log_err("utrans_getUnicodeID(%s) does not match the original ID\n", aescstrdup(id, idLength));
}
utrans_close(utrans);
}
uenum_reset(uenum, &errorCode);
if(U_FAILURE(errorCode) || count<1) {
log_err("uenum_reset(transliterator IDs) failed - %s\n", u_errorName(errorCode));
} else {
count2=uenum_count(uenum, &errorCode);
if(U_FAILURE(errorCode) || count<1) {
log_err("2nd uenum_count(transliterator IDs)=%d - %s\n", count2, u_errorName(errorCode));
} else if(count!=count2) {
log_err("uenum_unext(transliterator IDs) returned %d IDs but uenum_count() after uenum_reset() claims there are %d\n", count, count2);
}
}
uenum_close(uenum);
}
static void TestOpenInverse(){
UErrorCode status=U_ZERO_ERROR;
UTransliterator* t1=NULL;
@ -271,10 +336,13 @@ static void TestClone(){
static void TestRegisterUnregister(){
UErrorCode status=U_ZERO_ERROR;
UTransliterator* t1=NULL;
UTransliterator* rules=NULL;
UTransliterator* rules=NULL, *rules2;
UTransliterator* inverse1=NULL;
UChar rule[]={ 0x0061, 0x003c, 0x003e, 0x0063}; /*a<>b*/
U_STRING_DECL(ID, "TestA-TestB", 11);
U_STRING_INIT(ID, "TestA-TestB", 11);
/* Make sure it doesn't exist */
t1=utrans_open("TestA-TestB", UTRANS_FORWARD,NULL,0,NULL, &status);
if(t1 != NULL || U_SUCCESS(status)) {
@ -295,6 +363,14 @@ static void TestRegisterUnregister(){
log_err("FAIL: utrans_openRules(a<>B) failed with error=%s\n", myErrorName(status));
return;
}
/* clone it so we can register it a second time */
rules2=utrans_clone(rules, &status);
if(U_FAILURE(status)) {
log_err("FAIL: utrans_clone(a<>B) failed with error=%s\n", myErrorName(status));
return;
}
status=U_ZERO_ERROR;
/* Register it */
utrans_register(rules, &status);
@ -320,7 +396,34 @@ static void TestRegisterUnregister(){
log_err("FAIL: TestA-TestB isn't unregistered\n");
return;
}
utrans_close(t1);
/* now with utrans_unregisterID(const UChar *) */
status=U_ZERO_ERROR;
utrans_register(rules2, &status);
if(U_FAILURE(status)){
log_err("FAIL: 2nd utrans_register failed with error=%s\n", myErrorName(status));
return;
}
status=U_ZERO_ERROR;
/* Now check again -- should exist now*/
t1= utrans_open("TestA-TestB", UTRANS_FORWARD, NULL,0,NULL,&status);
if(U_FAILURE(status) || t1 == NULL){
log_err("FAIL: 2nd TestA-TestB not registered\n");
return;
}
utrans_close(t1);
/*unregister the instance*/
status=U_ZERO_ERROR;
utrans_unregisterID(ID, -1);
/* now Make sure it doesn't exist */
t1=utrans_openU(ID, -1, UTRANS_FORWARD,NULL,0,NULL, &status);
if(U_SUCCESS(status) || t1 != NULL) {
log_err("FAIL: 2nd TestA-TestB isn't unregistered\n");
return;
}
utrans_close(t1);
utrans_close(inverse1);
}
@ -376,7 +479,7 @@ static void TestFilter() {
UChar filt[128];
UChar buf[128];
UChar exp[128];
char cbuf[128];
char *cbuf;
int32_t limit;
const char* DATA[] = {
"[^c]", /* Filter out 'c' */
@ -420,9 +523,7 @@ static void TestFilter() {
goto exit;
}
/*u_austrcpy(cbuf, buf);*/
u_UCharsToChars(buf, cbuf, u_strlen(buf)+1);
/*u_uastrcpy(exp, DATA[i+2]);*/
cbuf=aescstrdup(buf, -1);
u_charsToUChars(DATA[i+2], exp, strlen(DATA[i+2])+1);
if (0 == u_strcmp(buf, exp)) {
log_verbose("Ok: %s | %s -> %s\n", DATA[i+1], DATA[i], cbuf);
@ -494,7 +595,10 @@ static void _expect(const UTransliterator* trans,
UChar from[CAP];
UChar to[CAP];
UChar buf[CAP];
char id[CAP];
const UChar *ID;
int32_t IDLength;
const char *id;
UErrorCode status = U_ZERO_ERROR;
int32_t limit;
UTransPosition pos;
@ -504,7 +608,8 @@ static void _expect(const UTransliterator* trans,
u_uastrcpy(from, cfrom);
u_uastrcpy(to, cto);
utrans_getID(trans, id, CAP);
ID = utrans_getUnicodeID(trans, &IDLength);
id = aescstrdup(ID, IDLength);
/* utrans_transUChars() */
u_strcpy(buf, from);