ICU-10585 Add utrans_toRules, utrans_getSourceSet

X-SVN-Rev: 34882
This commit is contained in:
Peter Edberg 2014-01-13 07:27:02 +00:00
parent 9cf4a2acd5
commit df7d845995
3 changed files with 158 additions and 3 deletions

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 1997-2011, International Business Machines
* Copyright (C) 1997-2011,2014 International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* Date Name Description
@ -19,6 +19,7 @@
#include "unicode/urep.h"
#include "unicode/parseerr.h"
#include "unicode/uenum.h"
#include "unicode/uset.h"
/********************************************************************
* General Notes
@ -508,6 +509,56 @@ utrans_transIncrementalUChars(const UTransliterator* trans,
UTransPosition* pos,
UErrorCode* status);
#ifndef U_HIDE_DRAFT_API
/**
* Create a rule string that can be passed to utrans_openU to recreate this
* transliterator.
*
* @param trans The transliterator
* @param escapeUnprintable if TRUE then convert unprintable characters to their
* hex escape representations, \\uxxxx or \\Uxxxxxxxx.
* Unprintable characters are those other than
* U+000A, U+0020..U+007E.
* @param result A pointer to a buffer to receive the rules.
* @param resultLength The maximum size of result.
* @param status A pointer to the UErrorCode. In case of error status, the
* contents of result are undefined.
* @return int32_t The length of the rule string (may be greater than resultLength,
* in which case an error is returned).
* @draft ICU 53
*/
U_DRAFT int32_t U_EXPORT2
utrans_toRules( const UTransliterator* trans,
UBool escapeUnprintable,
UChar* result, int32_t resultLength,
UErrorCode* status);
/**
* Returns the set of all characters that may be modified in the input text by
* this UTransliterator, optionally ignoring the transliterator's current filter.
* @param trans The transliterator.
* @param ignoreFilter If FALSE, the returned set incorporates the
* UTransliterator's current filter; if the filter is changed,
* the return value of this function will change. If TRUE, the
* returned set ignores the effect of the UTransliterator's
* current filter.
* @param fillIn Pointer to a USet object to receive the modifiable characters
* set. Previous contents of fillIn are lost. <em>If fillIn is
* NULL, then a new USet is created and returned. The caller
* owns the result and must dispose of it by calling uset_close.</em>
* @param status A pointer to the UErrorCode.
* @return USet* Either fillIn, or if fillIn is NULL, a pointer to a
* newly-allocated USet that the user must close. In case of
* error, NULL is returned.
* @draft ICU 53
*/
U_DRAFT USet* U_EXPORT2
utrans_getSourceSet(const UTransliterator* trans,
UBool ignoreFilter,
USet* fillIn,
UErrorCode* status);
#endif /* U_HIDE_DRAFT_API */
/* deprecated API ----------------------------------------------------------- */
#ifndef U_HIDE_DEPRECATED_API

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 1997-2009, International Business Machines
* Copyright (C) 1997-2009,2014 International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* Date Name Description
@ -20,6 +20,7 @@
#include "unicode/uniset.h"
#include "unicode/ustring.h"
#include "unicode/uenum.h"
#include "unicode/uset.h"
#include "uenumimp.h"
#include "cpputils.h"
#include "rbt.h"
@ -492,4 +493,39 @@ utrans_transIncrementalUChars(const UTransliterator* trans,
}
}
U_CAPI int32_t U_EXPORT2
utrans_toRules( const UTransliterator* trans,
UBool escapeUnprintable,
UChar* result, int32_t resultLength,
UErrorCode* status) {
utrans_ENTRY(status) -1;
UnicodeString res;
if (!(result==NULL && resultLength==0)) {
// NULL destination for pure preflighting: empty dummy string
// otherwise, alias the destination buffer
res.setTo(result, 0, resultLength);
}
((Transliterator*) trans)->toRules(res, escapeUnprintable);
return res.extract(result, resultLength, *status);
}
U_CAPI USet* U_EXPORT2
utrans_getSourceSet(const UTransliterator* trans,
UBool ignoreFilter,
USet* fillIn,
UErrorCode* status) {
utrans_ENTRY(status) fillIn;
if (fillIn == NULL) {
fillIn = uset_openEmpty();
}
if (ignoreFilter) {
((Transliterator*) trans)->handleGetSourceSet(*((UnicodeSet*)fillIn));
} else {
((Transliterator*) trans)->getSourceSet(*((UnicodeSet*)fillIn));
}
return fillIn;
}
#endif /* #if !UCONFIG_NO_TRANSLITERATION */

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 1997-2009, International Business Machines
* Copyright (C) 1997-2009,2014 International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* Date Name Description
@ -16,6 +16,7 @@
#include <string.h>
#include "unicode/utrans.h"
#include "unicode/ustring.h"
#include "unicode/uset.h"
#include "cintltst.h"
#define TEST(x) addTest(root, &x, "utrans/" # x)
@ -28,6 +29,7 @@ static void TestClone(void);
static void TestRegisterUnregister(void);
static void TestExtractBetween(void);
static void TestUnicodeIDs(void);
static void TestGetRulesAndSourceSet(void);
static void _expectRules(const char*, const char*, const char*);
static void _expect(const UTransliterator* trans, const char* cfrom, const char* cto);
@ -45,6 +47,7 @@ addUTransTest(TestNode** root) {
TEST(TestRegisterUnregister);
TEST(TestExtractBetween);
TEST(TestUnicodeIDs);
TEST(TestGetRulesAndSourceSet);
}
/*------------------------------------------------------------------
@ -568,6 +571,71 @@ static void TestExtractBetween() {
}
}
/**
* Test utrans_toRules, utrans_getSourceSet
*/
/* A simple transform with a small filter & source set: rules 50-100 chars unescaped, 100-200 chars escaped,
filter & source set 4-20 chars */
static const UChar transSimpleID[] = { 0x79,0x6F,0x2D,0x79,0x6F,0x5F,0x42,0x4A,0 }; /* "yo-yo_BJ" */
static const char* transSimpleCName = "yo-yo_BJ";
enum { kUBufMax = 256 };
static void TestGetRulesAndSourceSet() {
UErrorCode status = U_ZERO_ERROR;
UTransliterator *utrans = utrans_openU(transSimpleID, -1, UTRANS_FORWARD, NULL, 0, NULL, &status);
if ( U_SUCCESS(status) ) {
USet* uset;
UChar ubuf[kUBufMax];
int32_t ulen;
status = U_ZERO_ERROR;
ulen = utrans_toRules(utrans, FALSE, ubuf, kUBufMax, &status);
if ( U_FAILURE(status) || ulen <= 50 || ulen >= 100) {
log_err("FAIL: utrans_toRules unescaped, expected noErr and len 50-100, got error=%s and len=%d\n",
u_errorName(status), ulen);
}
status = U_ZERO_ERROR;
ulen = utrans_toRules(utrans, FALSE, NULL, 0, &status);
if ( status != U_BUFFER_OVERFLOW_ERROR || ulen <= 50 || ulen >= 100) {
log_err("FAIL: utrans_toRules unescaped, expected U_BUFFER_OVERFLOW_ERROR and len 50-100, got error=%s and len=%d\n",
u_errorName(status), ulen);
}
status = U_ZERO_ERROR;
ulen = utrans_toRules(utrans, TRUE, ubuf, kUBufMax, &status);
if ( U_FAILURE(status) || ulen <= 100 || ulen >= 200) {
log_err("FAIL: utrans_toRules escaped, expected noErr and len 100-200, got error=%s and len=%d\n",
u_errorName(status), ulen);
}
status = U_ZERO_ERROR;
uset = utrans_getSourceSet(utrans, FALSE, NULL, &status);
ulen = uset_toPattern(uset, ubuf, kUBufMax, FALSE, &status);
uset_close(uset);
if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) {
log_err("FAIL: utrans_getSourceSet useFilter, expected noErr and len 4-20, got error=%s and len=%d\n",
u_errorName(status), ulen);
}
status = U_ZERO_ERROR;
uset = utrans_getSourceSet(utrans, TRUE, NULL, &status);
ulen = uset_toPattern(uset, ubuf, kUBufMax, FALSE, &status);
uset_close(uset);
if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) {
log_err("FAIL: utrans_getSourceSet ignoreFilter, expected noErr and len 4-20, got error=%s and len=%d\n",
u_errorName(status), ulen);
}
utrans_close(utrans);
} else {
log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n",
transSimpleCName, u_errorName(status));
}
}
static void _expectRules(const char* crules,
const char* cfrom,
const char* cto) {