From df7d84599515dc2ca27600ee3df64bb976b156ac Mon Sep 17 00:00:00 2001 From: Peter Edberg Date: Mon, 13 Jan 2014 07:27:02 +0000 Subject: [PATCH] ICU-10585 Add utrans_toRules, utrans_getSourceSet X-SVN-Rev: 34882 --- icu4c/source/i18n/unicode/utrans.h | 53 +++++++++++++++++++- icu4c/source/i18n/utrans.cpp | 38 ++++++++++++++- icu4c/source/test/cintltst/utransts.c | 70 ++++++++++++++++++++++++++- 3 files changed, 158 insertions(+), 3 deletions(-) diff --git a/icu4c/source/i18n/unicode/utrans.h b/icu4c/source/i18n/unicode/utrans.h index 5c1b4136d61..c6f67da0342 100644 --- a/icu4c/source/i18n/unicode/utrans.h +++ b/icu4c/source/i18n/unicode/utrans.h @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 1997-2011, International Business Machines +* Copyright (C) 1997-2011,2014 International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * Date Name Description @@ -19,6 +19,7 @@ #include "unicode/urep.h" #include "unicode/parseerr.h" #include "unicode/uenum.h" +#include "unicode/uset.h" /******************************************************************** * General Notes @@ -508,6 +509,56 @@ utrans_transIncrementalUChars(const UTransliterator* trans, UTransPosition* pos, UErrorCode* status); +#ifndef U_HIDE_DRAFT_API +/** + * Create a rule string that can be passed to utrans_openU to recreate this + * transliterator. + * + * @param trans The transliterator + * @param escapeUnprintable if TRUE then convert unprintable characters to their + * hex escape representations, \\uxxxx or \\Uxxxxxxxx. + * Unprintable characters are those other than + * U+000A, U+0020..U+007E. + * @param result A pointer to a buffer to receive the rules. + * @param resultLength The maximum size of result. + * @param status A pointer to the UErrorCode. In case of error status, the + * contents of result are undefined. + * @return int32_t The length of the rule string (may be greater than resultLength, + * in which case an error is returned). + * @draft ICU 53 + */ +U_DRAFT int32_t U_EXPORT2 +utrans_toRules( const UTransliterator* trans, + UBool escapeUnprintable, + UChar* result, int32_t resultLength, + UErrorCode* status); + +/** + * Returns the set of all characters that may be modified in the input text by + * this UTransliterator, optionally ignoring the transliterator's current filter. + * @param trans The transliterator. + * @param ignoreFilter If FALSE, the returned set incorporates the + * UTransliterator's current filter; if the filter is changed, + * the return value of this function will change. If TRUE, the + * returned set ignores the effect of the UTransliterator's + * current filter. + * @param fillIn Pointer to a USet object to receive the modifiable characters + * set. Previous contents of fillIn are lost. If fillIn is + * NULL, then a new USet is created and returned. The caller + * owns the result and must dispose of it by calling uset_close. + * @param status A pointer to the UErrorCode. + * @return USet* Either fillIn, or if fillIn is NULL, a pointer to a + * newly-allocated USet that the user must close. In case of + * error, NULL is returned. + * @draft ICU 53 + */ +U_DRAFT USet* U_EXPORT2 +utrans_getSourceSet(const UTransliterator* trans, + UBool ignoreFilter, + USet* fillIn, + UErrorCode* status); +#endif /* U_HIDE_DRAFT_API */ + /* deprecated API ----------------------------------------------------------- */ #ifndef U_HIDE_DEPRECATED_API diff --git a/icu4c/source/i18n/utrans.cpp b/icu4c/source/i18n/utrans.cpp index 16a09bdcb6a..0d614a0c7df 100644 --- a/icu4c/source/i18n/utrans.cpp +++ b/icu4c/source/i18n/utrans.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* - * Copyright (C) 1997-2009, International Business Machines + * Copyright (C) 1997-2009,2014 International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * Date Name Description @@ -20,6 +20,7 @@ #include "unicode/uniset.h" #include "unicode/ustring.h" #include "unicode/uenum.h" +#include "unicode/uset.h" #include "uenumimp.h" #include "cpputils.h" #include "rbt.h" @@ -492,4 +493,39 @@ utrans_transIncrementalUChars(const UTransliterator* trans, } } +U_CAPI int32_t U_EXPORT2 +utrans_toRules( const UTransliterator* trans, + UBool escapeUnprintable, + UChar* result, int32_t resultLength, + UErrorCode* status) { + utrans_ENTRY(status) -1; + + UnicodeString res; + if (!(result==NULL && resultLength==0)) { + // NULL destination for pure preflighting: empty dummy string + // otherwise, alias the destination buffer + res.setTo(result, 0, resultLength); + } + ((Transliterator*) trans)->toRules(res, escapeUnprintable); + return res.extract(result, resultLength, *status); +} + +U_CAPI USet* U_EXPORT2 +utrans_getSourceSet(const UTransliterator* trans, + UBool ignoreFilter, + USet* fillIn, + UErrorCode* status) { + utrans_ENTRY(status) fillIn; + + if (fillIn == NULL) { + fillIn = uset_openEmpty(); + } + if (ignoreFilter) { + ((Transliterator*) trans)->handleGetSourceSet(*((UnicodeSet*)fillIn)); + } else { + ((Transliterator*) trans)->getSourceSet(*((UnicodeSet*)fillIn)); + } + return fillIn; +} + #endif /* #if !UCONFIG_NO_TRANSLITERATION */ diff --git a/icu4c/source/test/cintltst/utransts.c b/icu4c/source/test/cintltst/utransts.c index 440b915e819..91c777f36ce 100644 --- a/icu4c/source/test/cintltst/utransts.c +++ b/icu4c/source/test/cintltst/utransts.c @@ -1,6 +1,6 @@ /* ******************************************************************************* - * Copyright (C) 1997-2009, International Business Machines + * Copyright (C) 1997-2009,2014 International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * Date Name Description @@ -16,6 +16,7 @@ #include #include "unicode/utrans.h" #include "unicode/ustring.h" +#include "unicode/uset.h" #include "cintltst.h" #define TEST(x) addTest(root, &x, "utrans/" # x) @@ -28,6 +29,7 @@ static void TestClone(void); static void TestRegisterUnregister(void); static void TestExtractBetween(void); static void TestUnicodeIDs(void); +static void TestGetRulesAndSourceSet(void); static void _expectRules(const char*, const char*, const char*); static void _expect(const UTransliterator* trans, const char* cfrom, const char* cto); @@ -45,6 +47,7 @@ addUTransTest(TestNode** root) { TEST(TestRegisterUnregister); TEST(TestExtractBetween); TEST(TestUnicodeIDs); + TEST(TestGetRulesAndSourceSet); } /*------------------------------------------------------------------ @@ -568,6 +571,71 @@ static void TestExtractBetween() { } } +/** + * Test utrans_toRules, utrans_getSourceSet + */ + +/* A simple transform with a small filter & source set: rules 50-100 chars unescaped, 100-200 chars escaped, + filter & source set 4-20 chars */ +static const UChar transSimpleID[] = { 0x79,0x6F,0x2D,0x79,0x6F,0x5F,0x42,0x4A,0 }; /* "yo-yo_BJ" */ +static const char* transSimpleCName = "yo-yo_BJ"; + +enum { kUBufMax = 256 }; +static void TestGetRulesAndSourceSet() { + UErrorCode status = U_ZERO_ERROR; + UTransliterator *utrans = utrans_openU(transSimpleID, -1, UTRANS_FORWARD, NULL, 0, NULL, &status); + if ( U_SUCCESS(status) ) { + USet* uset; + UChar ubuf[kUBufMax]; + int32_t ulen; + + status = U_ZERO_ERROR; + ulen = utrans_toRules(utrans, FALSE, ubuf, kUBufMax, &status); + if ( U_FAILURE(status) || ulen <= 50 || ulen >= 100) { + log_err("FAIL: utrans_toRules unescaped, expected noErr and len 50-100, got error=%s and len=%d\n", + u_errorName(status), ulen); + } + + status = U_ZERO_ERROR; + ulen = utrans_toRules(utrans, FALSE, NULL, 0, &status); + if ( status != U_BUFFER_OVERFLOW_ERROR || ulen <= 50 || ulen >= 100) { + log_err("FAIL: utrans_toRules unescaped, expected U_BUFFER_OVERFLOW_ERROR and len 50-100, got error=%s and len=%d\n", + u_errorName(status), ulen); + } + + status = U_ZERO_ERROR; + ulen = utrans_toRules(utrans, TRUE, ubuf, kUBufMax, &status); + if ( U_FAILURE(status) || ulen <= 100 || ulen >= 200) { + log_err("FAIL: utrans_toRules escaped, expected noErr and len 100-200, got error=%s and len=%d\n", + u_errorName(status), ulen); + } + + status = U_ZERO_ERROR; + uset = utrans_getSourceSet(utrans, FALSE, NULL, &status); + ulen = uset_toPattern(uset, ubuf, kUBufMax, FALSE, &status); + uset_close(uset); + if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) { + log_err("FAIL: utrans_getSourceSet useFilter, expected noErr and len 4-20, got error=%s and len=%d\n", + u_errorName(status), ulen); + } + + status = U_ZERO_ERROR; + uset = utrans_getSourceSet(utrans, TRUE, NULL, &status); + ulen = uset_toPattern(uset, ubuf, kUBufMax, FALSE, &status); + uset_close(uset); + if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) { + log_err("FAIL: utrans_getSourceSet ignoreFilter, expected noErr and len 4-20, got error=%s and len=%d\n", + u_errorName(status), ulen); + } + + utrans_close(utrans); + } else { + log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n", + transSimpleCName, u_errorName(status)); + } +} + + static void _expectRules(const char* crules, const char* cfrom, const char* cto) {