mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 05:55:35 +00:00
ICU-10585 Add utrans_toRules, utrans_getSourceSet
X-SVN-Rev: 34882
This commit is contained in:
parent
9cf4a2acd5
commit
df7d845995
3 changed files with 158 additions and 3 deletions
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1997-2011, International Business Machines
|
||||
* Copyright (C) 1997-2011,2014 International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* Date Name Description
|
||||
|
@ -19,6 +19,7 @@
|
|||
#include "unicode/urep.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/uenum.h"
|
||||
#include "unicode/uset.h"
|
||||
|
||||
/********************************************************************
|
||||
* General Notes
|
||||
|
@ -508,6 +509,56 @@ utrans_transIncrementalUChars(const UTransliterator* trans,
|
|||
UTransPosition* pos,
|
||||
UErrorCode* status);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Create a rule string that can be passed to utrans_openU to recreate this
|
||||
* transliterator.
|
||||
*
|
||||
* @param trans The transliterator
|
||||
* @param escapeUnprintable if TRUE then convert unprintable characters to their
|
||||
* hex escape representations, \\uxxxx or \\Uxxxxxxxx.
|
||||
* Unprintable characters are those other than
|
||||
* U+000A, U+0020..U+007E.
|
||||
* @param result A pointer to a buffer to receive the rules.
|
||||
* @param resultLength The maximum size of result.
|
||||
* @param status A pointer to the UErrorCode. In case of error status, the
|
||||
* contents of result are undefined.
|
||||
* @return int32_t The length of the rule string (may be greater than resultLength,
|
||||
* in which case an error is returned).
|
||||
* @draft ICU 53
|
||||
*/
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
utrans_toRules( const UTransliterator* trans,
|
||||
UBool escapeUnprintable,
|
||||
UChar* result, int32_t resultLength,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Returns the set of all characters that may be modified in the input text by
|
||||
* this UTransliterator, optionally ignoring the transliterator's current filter.
|
||||
* @param trans The transliterator.
|
||||
* @param ignoreFilter If FALSE, the returned set incorporates the
|
||||
* UTransliterator's current filter; if the filter is changed,
|
||||
* the return value of this function will change. If TRUE, the
|
||||
* returned set ignores the effect of the UTransliterator's
|
||||
* current filter.
|
||||
* @param fillIn Pointer to a USet object to receive the modifiable characters
|
||||
* set. Previous contents of fillIn are lost. <em>If fillIn is
|
||||
* NULL, then a new USet is created and returned. The caller
|
||||
* owns the result and must dispose of it by calling uset_close.</em>
|
||||
* @param status A pointer to the UErrorCode.
|
||||
* @return USet* Either fillIn, or if fillIn is NULL, a pointer to a
|
||||
* newly-allocated USet that the user must close. In case of
|
||||
* error, NULL is returned.
|
||||
* @draft ICU 53
|
||||
*/
|
||||
U_DRAFT USet* U_EXPORT2
|
||||
utrans_getSourceSet(const UTransliterator* trans,
|
||||
UBool ignoreFilter,
|
||||
USet* fillIn,
|
||||
UErrorCode* status);
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
/* deprecated API ----------------------------------------------------------- */
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1997-2009, International Business Machines
|
||||
* Copyright (C) 1997-2009,2014 International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* Date Name Description
|
||||
|
@ -20,6 +20,7 @@
|
|||
#include "unicode/uniset.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uenum.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "uenumimp.h"
|
||||
#include "cpputils.h"
|
||||
#include "rbt.h"
|
||||
|
@ -492,4 +493,39 @@ utrans_transIncrementalUChars(const UTransliterator* trans,
|
|||
}
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utrans_toRules( const UTransliterator* trans,
|
||||
UBool escapeUnprintable,
|
||||
UChar* result, int32_t resultLength,
|
||||
UErrorCode* status) {
|
||||
utrans_ENTRY(status) -1;
|
||||
|
||||
UnicodeString res;
|
||||
if (!(result==NULL && resultLength==0)) {
|
||||
// NULL destination for pure preflighting: empty dummy string
|
||||
// otherwise, alias the destination buffer
|
||||
res.setTo(result, 0, resultLength);
|
||||
}
|
||||
((Transliterator*) trans)->toRules(res, escapeUnprintable);
|
||||
return res.extract(result, resultLength, *status);
|
||||
}
|
||||
|
||||
U_CAPI USet* U_EXPORT2
|
||||
utrans_getSourceSet(const UTransliterator* trans,
|
||||
UBool ignoreFilter,
|
||||
USet* fillIn,
|
||||
UErrorCode* status) {
|
||||
utrans_ENTRY(status) fillIn;
|
||||
|
||||
if (fillIn == NULL) {
|
||||
fillIn = uset_openEmpty();
|
||||
}
|
||||
if (ignoreFilter) {
|
||||
((Transliterator*) trans)->handleGetSourceSet(*((UnicodeSet*)fillIn));
|
||||
} else {
|
||||
((Transliterator*) trans)->getSourceSet(*((UnicodeSet*)fillIn));
|
||||
}
|
||||
return fillIn;
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_TRANSLITERATION */
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1997-2009, International Business Machines
|
||||
* Copyright (C) 1997-2009,2014 International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* Date Name Description
|
||||
|
@ -16,6 +16,7 @@
|
|||
#include <string.h>
|
||||
#include "unicode/utrans.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "cintltst.h"
|
||||
|
||||
#define TEST(x) addTest(root, &x, "utrans/" # x)
|
||||
|
@ -28,6 +29,7 @@ static void TestClone(void);
|
|||
static void TestRegisterUnregister(void);
|
||||
static void TestExtractBetween(void);
|
||||
static void TestUnicodeIDs(void);
|
||||
static void TestGetRulesAndSourceSet(void);
|
||||
|
||||
static void _expectRules(const char*, const char*, const char*);
|
||||
static void _expect(const UTransliterator* trans, const char* cfrom, const char* cto);
|
||||
|
@ -45,6 +47,7 @@ addUTransTest(TestNode** root) {
|
|||
TEST(TestRegisterUnregister);
|
||||
TEST(TestExtractBetween);
|
||||
TEST(TestUnicodeIDs);
|
||||
TEST(TestGetRulesAndSourceSet);
|
||||
}
|
||||
|
||||
/*------------------------------------------------------------------
|
||||
|
@ -568,6 +571,71 @@ static void TestExtractBetween() {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test utrans_toRules, utrans_getSourceSet
|
||||
*/
|
||||
|
||||
/* A simple transform with a small filter & source set: rules 50-100 chars unescaped, 100-200 chars escaped,
|
||||
filter & source set 4-20 chars */
|
||||
static const UChar transSimpleID[] = { 0x79,0x6F,0x2D,0x79,0x6F,0x5F,0x42,0x4A,0 }; /* "yo-yo_BJ" */
|
||||
static const char* transSimpleCName = "yo-yo_BJ";
|
||||
|
||||
enum { kUBufMax = 256 };
|
||||
static void TestGetRulesAndSourceSet() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UTransliterator *utrans = utrans_openU(transSimpleID, -1, UTRANS_FORWARD, NULL, 0, NULL, &status);
|
||||
if ( U_SUCCESS(status) ) {
|
||||
USet* uset;
|
||||
UChar ubuf[kUBufMax];
|
||||
int32_t ulen;
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
ulen = utrans_toRules(utrans, FALSE, ubuf, kUBufMax, &status);
|
||||
if ( U_FAILURE(status) || ulen <= 50 || ulen >= 100) {
|
||||
log_err("FAIL: utrans_toRules unescaped, expected noErr and len 50-100, got error=%s and len=%d\n",
|
||||
u_errorName(status), ulen);
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
ulen = utrans_toRules(utrans, FALSE, NULL, 0, &status);
|
||||
if ( status != U_BUFFER_OVERFLOW_ERROR || ulen <= 50 || ulen >= 100) {
|
||||
log_err("FAIL: utrans_toRules unescaped, expected U_BUFFER_OVERFLOW_ERROR and len 50-100, got error=%s and len=%d\n",
|
||||
u_errorName(status), ulen);
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
ulen = utrans_toRules(utrans, TRUE, ubuf, kUBufMax, &status);
|
||||
if ( U_FAILURE(status) || ulen <= 100 || ulen >= 200) {
|
||||
log_err("FAIL: utrans_toRules escaped, expected noErr and len 100-200, got error=%s and len=%d\n",
|
||||
u_errorName(status), ulen);
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
uset = utrans_getSourceSet(utrans, FALSE, NULL, &status);
|
||||
ulen = uset_toPattern(uset, ubuf, kUBufMax, FALSE, &status);
|
||||
uset_close(uset);
|
||||
if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) {
|
||||
log_err("FAIL: utrans_getSourceSet useFilter, expected noErr and len 4-20, got error=%s and len=%d\n",
|
||||
u_errorName(status), ulen);
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
uset = utrans_getSourceSet(utrans, TRUE, NULL, &status);
|
||||
ulen = uset_toPattern(uset, ubuf, kUBufMax, FALSE, &status);
|
||||
uset_close(uset);
|
||||
if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) {
|
||||
log_err("FAIL: utrans_getSourceSet ignoreFilter, expected noErr and len 4-20, got error=%s and len=%d\n",
|
||||
u_errorName(status), ulen);
|
||||
}
|
||||
|
||||
utrans_close(utrans);
|
||||
} else {
|
||||
log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n",
|
||||
transSimpleCName, u_errorName(status));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void _expectRules(const char* crules,
|
||||
const char* cfrom,
|
||||
const char* cto) {
|
||||
|
|
Loading…
Add table
Reference in a new issue