mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-1130 use perl \N{name} notation in Any-Name and Name-Any
X-SVN-Rev: 9849
This commit is contained in:
parent
c8f160bcab
commit
85d23479b6
5 changed files with 143 additions and 127 deletions
|
@ -10,11 +10,11 @@
|
|||
|
||||
#include "unicode/unifilt.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "name2uni.h"
|
||||
#include "cmemory.h"
|
||||
#include "uprops.h"
|
||||
|
||||
// As of Unicode 3.0.0, the longest name is 83 characters long.
|
||||
#define LONGEST_NAME 83
|
||||
#include "util.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -22,25 +22,17 @@ const char NameUnicodeTransliterator::fgClassID=0;
|
|||
|
||||
const char NameUnicodeTransliterator::_ID[] = "Name-Any";
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
*/
|
||||
NameUnicodeTransliterator::NameUnicodeTransliterator(
|
||||
UChar32 openDelim, UChar32 closeDelim,
|
||||
UnicodeFilter* adoptedFilter) :
|
||||
Transliterator(_ID, adoptedFilter),
|
||||
openDelimiter(openDelim),
|
||||
closeDelimiter(closeDelim) {
|
||||
}
|
||||
static const UChar OPEN[] = {92,78,126,123,126,0}; // "\N~{~"
|
||||
static const UChar OPEN_DELIM = 92; // '\\' first char of OPEN
|
||||
static const UChar CLOSE_DELIM = 125; // '}'
|
||||
static const UChar SPACE = 32; // ' '
|
||||
|
||||
/**
|
||||
* Constructs a transliterator with the default delimiters '{' and
|
||||
* '}'.
|
||||
*/
|
||||
NameUnicodeTransliterator::NameUnicodeTransliterator(UnicodeFilter* adoptedFilter) :
|
||||
Transliterator(_ID, adoptedFilter),
|
||||
openDelimiter((UChar) 0x007B /*{*/),
|
||||
closeDelimiter((UChar) 0x007D /*}*/) {
|
||||
Transliterator(_ID, adoptedFilter) {
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -52,9 +44,7 @@ NameUnicodeTransliterator::~NameUnicodeTransliterator() {}
|
|||
* Copy constructor.
|
||||
*/
|
||||
NameUnicodeTransliterator::NameUnicodeTransliterator(const NameUnicodeTransliterator& o) :
|
||||
Transliterator(o),
|
||||
openDelimiter(o.openDelimiter),
|
||||
closeDelimiter(o.closeDelimiter) {}
|
||||
Transliterator(o) {}
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
|
@ -62,8 +52,6 @@ NameUnicodeTransliterator::NameUnicodeTransliterator(const NameUnicodeTransliter
|
|||
NameUnicodeTransliterator& NameUnicodeTransliterator::operator=(
|
||||
const NameUnicodeTransliterator& o) {
|
||||
Transliterator::operator=(o);
|
||||
openDelimiter = o.openDelimiter;
|
||||
closeDelimiter = o.closeDelimiter;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -79,13 +67,30 @@ Transliterator* NameUnicodeTransliterator::clone(void) const {
|
|||
*/
|
||||
void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
|
||||
UBool isIncremental) const {
|
||||
// Accomodate the longest possible name plus padding
|
||||
UChar buf[LONGEST_NAME + 8];
|
||||
char cbuf[LONGEST_NAME + 8]; // Default converter
|
||||
// The failure mode, here and below, is to behave like Any-Null,
|
||||
// if either there is no name data (max len == 0) or there is no
|
||||
// memory (malloc() => NULL).
|
||||
|
||||
// The only characters used in names are (as of Unicode 3.0.0):
|
||||
// -0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
// (first character is a space).
|
||||
int32_t maxLen = uprv_getMaxCharNameLength();
|
||||
if (maxLen == 0) {
|
||||
offsets.start = offsets.limit;
|
||||
return;
|
||||
}
|
||||
|
||||
// Accomodate the longest possible name
|
||||
++maxLen; // allow for temporary trailing space
|
||||
char* cbuf = (char*) uprv_malloc(maxLen);
|
||||
if (cbuf == NULL) {
|
||||
offsets.start = offsets.limit;
|
||||
return;
|
||||
}
|
||||
|
||||
UnicodeString openPat(TRUE, OPEN, -1);
|
||||
UnicodeString str, name;
|
||||
|
||||
// Get the legal character set
|
||||
UnicodeSet legal;
|
||||
uprv_getCharNameCharacters((USet*) &legal); // USet* == UnicodeSet*
|
||||
|
||||
int32_t cursor = offsets.start;
|
||||
int32_t limit = offsets.limit;
|
||||
|
@ -94,63 +99,77 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
|
|||
// 0 - looking for open delimiter
|
||||
// 1 - after open delimiter
|
||||
int32_t mode = 0;
|
||||
int32_t ibuf = 0;
|
||||
int32_t openPos = offsets.start; // position of openDelimiter
|
||||
|
||||
UnicodeString str;
|
||||
int32_t openPos = -1; // open delim candidate pos
|
||||
|
||||
UChar32 c;
|
||||
for (; cursor < limit; cursor+=UTF_CHAR_LENGTH(c)) {
|
||||
while (cursor < limit) {
|
||||
c = text.char32At(cursor);
|
||||
|
||||
switch (mode) {
|
||||
case 0: // looking for open delimiter
|
||||
if (c == openDelimiter) {
|
||||
if (c == OPEN_DELIM) { // quick check first
|
||||
openPos = cursor;
|
||||
mode = 1;
|
||||
ibuf = 0;
|
||||
int32_t i =
|
||||
ICU_Utility::parsePattern(openPat, text, cursor, limit);
|
||||
if (i >= 0 && i < limit) {
|
||||
mode = 1;
|
||||
name.truncate(0);
|
||||
cursor = i;
|
||||
continue; // *** reprocess char32At(cursor)
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: // after open delimiter
|
||||
// Look for [-a-zA-Z0-9<>]. If \s+ is found, convert it
|
||||
// Look for legal chars. If \s+ is found, convert it
|
||||
// to a single space. If closeDelimiter is found, exit
|
||||
// the loop. If any other character is found, exit the
|
||||
// loop. If the limit is found, exit the loop.
|
||||
// loop. If the limit is reached, exit the loop.
|
||||
|
||||
// Convert \s+ => SPACE. This assumes there are no
|
||||
// runs of >1 space characters in names.
|
||||
if (uprv_isRuleWhiteSpace(c)) {
|
||||
// Ignore leading whitespace
|
||||
if (ibuf != 0 && buf[ibuf-1] != (UChar)0x0020) {
|
||||
buf[ibuf++] = (UChar)0x0020 /* */;
|
||||
// If we go a bit past the longest possible name then abort
|
||||
if (ibuf == (LONGEST_NAME + 4)) {
|
||||
if (name.length() > 0 &&
|
||||
name.charAt(name.length()-1) != SPACE) {
|
||||
name.append(SPACE);
|
||||
// If we are too long then abort. maxLen includes
|
||||
// temporary trailing space, so use '>'.
|
||||
if (name.length() > maxLen) {
|
||||
mode = 0;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
|
||||
if (c == closeDelimiter) {
|
||||
if (c == CLOSE_DELIM) {
|
||||
|
||||
int32_t len = name.length();
|
||||
|
||||
// Delete trailing space, if any
|
||||
if (ibuf > 0 && buf[ibuf-1] == (UChar)0x0020) {
|
||||
--ibuf;
|
||||
if (len > 0 &&
|
||||
name.charAt(len-1) == SPACE) {
|
||||
--len;
|
||||
}
|
||||
buf[ibuf] = 0; // Add terminating zero
|
||||
|
||||
name.extract(0, len, cbuf, "");
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
UChar32 ch;
|
||||
|
||||
u_UCharsToChars(buf, cbuf, ibuf+1);
|
||||
ch = u_charFromName(U_EXTENDED_CHAR_NAME, cbuf, &status);
|
||||
c = u_charFromName(U_EXTENDED_CHAR_NAME, cbuf, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
// Lookup succeeded
|
||||
|
||||
// assert(UTF_CHAR_LENGTH(CLOSE_DELIM) == 1);
|
||||
cursor++; // advance over CLOSE_DELIM
|
||||
|
||||
str.truncate(0);
|
||||
str.append(ch);
|
||||
text.handleReplaceBetween(openPos, cursor+1, str);
|
||||
str.append(c);
|
||||
text.handleReplaceBetween(openPos, cursor, str);
|
||||
|
||||
// Adjust indices for the change in the length of
|
||||
// the string. Do not assume that str.length() ==
|
||||
// 1, in case of surrogates.
|
||||
int32_t delta = cursor + 1 - openPos - str.length();
|
||||
int32_t delta = cursor - openPos - str.length();
|
||||
cursor -= delta;
|
||||
limit -= delta;
|
||||
// assert(cursor == openPos + str.length());
|
||||
|
@ -158,18 +177,18 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
|
|||
// If the lookup failed, we leave things as-is and
|
||||
// still switch to mode 0 and continue.
|
||||
mode = 0;
|
||||
continue;
|
||||
openPos = -1; // close off candidate
|
||||
continue; // *** reprocess char32At(cursor)
|
||||
}
|
||||
|
||||
// Check if c =~ [-A-Za-z0-9<> ]
|
||||
if (c == (UChar)0x002D ||
|
||||
(c >= (UChar)0x0041 && c <= (UChar)0x005A) ||
|
||||
(c >= (UChar)0x0061 && c <= (UChar)0x007A) ||
|
||||
(c >= (UChar)0x0030 && c <= (UChar)0x0039) ||
|
||||
c == (UChar)0x003C || c == (UChar)0x003E) {
|
||||
buf[ibuf++] = (char) c;
|
||||
// If we go a bit past the longest possible name then abort
|
||||
if (ibuf == (LONGEST_NAME + 4)) {
|
||||
// Check if c is a legal char. We assume here that
|
||||
// legal.contains(OPEN_DELIM) is FALSE, so when we abort a
|
||||
// name, we don't have to go back to openPos+1.
|
||||
if (legal.contains(c)) {
|
||||
name.append(c);
|
||||
// If we go past the longest possible name then abort.
|
||||
// maxLen includes temporary trailing space, so use '>='.
|
||||
if (name.length() >= maxLen) {
|
||||
mode = 0;
|
||||
}
|
||||
}
|
||||
|
@ -182,13 +201,17 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
|
|||
|
||||
break;
|
||||
}
|
||||
|
||||
cursor += UTF_CHAR_LENGTH(c);
|
||||
}
|
||||
|
||||
offsets.contextLimit += limit - offsets.limit;
|
||||
offsets.limit = limit;
|
||||
// In incremental mode, only advance the cursor up to the last
|
||||
// open delimiter, if we are in mode 1.
|
||||
offsets.start = (mode == 1 && isIncremental) ? openPos : cursor;
|
||||
// open delimiter candidate.
|
||||
offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor;
|
||||
|
||||
uprv_free(cbuf);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -16,14 +16,12 @@ U_NAMESPACE_BEGIN
|
|||
|
||||
/**
|
||||
* A transliterator that performs name to character mapping.
|
||||
* It recognizes the Perl syntax \N{name}.
|
||||
* @author Alan Liu
|
||||
* @draft ICU 2.0
|
||||
*/
|
||||
class U_I18N_API NameUnicodeTransliterator : public Transliterator {
|
||||
|
||||
UChar32 openDelimiter;
|
||||
UChar32 closeDelimiter;
|
||||
|
||||
/**
|
||||
* The address of this static class variable serves as this class's ID
|
||||
* for ICU "poor man's RTTI".
|
||||
|
@ -34,17 +32,6 @@ class U_I18N_API NameUnicodeTransliterator : public Transliterator {
|
|||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
* @param openDelimiter the open delimiter character.
|
||||
* @param closeDelimiter the close delimiter character.
|
||||
* @param adoptedFilter the filter for this transliterator.
|
||||
* @draft ICU 2.0
|
||||
*/
|
||||
NameUnicodeTransliterator(UChar32 openDelimiter, UChar32 closeDelimiter,
|
||||
UnicodeFilter* adoptedFilter = 0);
|
||||
|
||||
/**
|
||||
* Constructs a transliterator with the default delimiters '{' and
|
||||
* '}'.
|
||||
* @param adoptedFilter the filter for this transliterator.
|
||||
* @draft ICU 2.0
|
||||
*/
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
#include "unicode/uchar.h"
|
||||
#include "uni2name.h"
|
||||
#include "cstring.h"
|
||||
#include "cmemory.h"
|
||||
#include "uprops.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -19,25 +21,15 @@ const char UnicodeNameTransliterator::fgClassID=0;
|
|||
|
||||
const char UnicodeNameTransliterator::_ID[] = "Any-Name";
|
||||
|
||||
static const UChar OPEN_DELIM[] = {92,78,123,0}; // "\N{"
|
||||
static const UChar CLOSE_DELIM = 125; // "}"
|
||||
#define OPEN_DELIM_LEN 3
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
*/
|
||||
UnicodeNameTransliterator::UnicodeNameTransliterator(
|
||||
UChar32 openDelim, UChar32 closeDelim,
|
||||
UnicodeFilter* adoptedFilter) :
|
||||
Transliterator(_ID, adoptedFilter),
|
||||
openDelimiter(openDelim),
|
||||
closeDelimiter(closeDelim) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a transliterator with the default delimiters '{' and
|
||||
* '}'.
|
||||
*/
|
||||
UnicodeNameTransliterator::UnicodeNameTransliterator(UnicodeFilter* adoptedFilter) :
|
||||
Transliterator(_ID, adoptedFilter),
|
||||
openDelimiter((UChar) 0x007B /*{*/),
|
||||
closeDelimiter((UChar) 0x007D /*}*/) {
|
||||
Transliterator(_ID, adoptedFilter) {
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -49,9 +41,7 @@ UnicodeNameTransliterator::~UnicodeNameTransliterator() {}
|
|||
* Copy constructor.
|
||||
*/
|
||||
UnicodeNameTransliterator::UnicodeNameTransliterator(const UnicodeNameTransliterator& o) :
|
||||
Transliterator(o),
|
||||
openDelimiter(o.openDelimiter),
|
||||
closeDelimiter(o.closeDelimiter) {}
|
||||
Transliterator(o) {}
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
|
@ -59,8 +49,6 @@ UnicodeNameTransliterator::UnicodeNameTransliterator(const UnicodeNameTransliter
|
|||
UnicodeNameTransliterator& UnicodeNameTransliterator::operator=(
|
||||
const UnicodeNameTransliterator& o) {
|
||||
Transliterator::operator=(o);
|
||||
openDelimiter = o.openDelimiter;
|
||||
closeDelimiter = o.closeDelimiter;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -78,15 +66,27 @@ Transliterator* UnicodeNameTransliterator::clone(void) const {
|
|||
*/
|
||||
void UnicodeNameTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
|
||||
UBool /*isIncremental*/) const {
|
||||
// As of Unicode 3.0.0, the longest name is 83 characters long.
|
||||
// Adjust this buffer size as needed.
|
||||
// The failure mode, here and below, is to behave like Any-Null,
|
||||
// if either there is no name data (max len == 0) or there is no
|
||||
// memory (malloc() => NULL).
|
||||
|
||||
char buf[128];
|
||||
int32_t maxLen = uprv_getMaxCharNameLength();
|
||||
if (maxLen == 0) {
|
||||
offsets.start = offsets.limit;
|
||||
return;
|
||||
}
|
||||
|
||||
// Accomodate the longest possible name plus padding
|
||||
char* buf = (char*) uprv_malloc(maxLen);
|
||||
if (buf == NULL) {
|
||||
offsets.start = offsets.limit;
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t cursor = offsets.start;
|
||||
int32_t limit = offsets.limit;
|
||||
|
||||
UnicodeString str(openDelimiter);
|
||||
UnicodeString str(FALSE, OPEN_DELIM, OPEN_DELIM_LEN);
|
||||
UErrorCode status;
|
||||
int32_t len;
|
||||
|
||||
|
@ -94,11 +94,11 @@ void UnicodeNameTransliterator::handleTransliterate(Replaceable& text, UTransPos
|
|||
UChar32 c = text.char32At(cursor);
|
||||
int32_t clen = UTF_CHAR_LENGTH(c);
|
||||
status = U_ZERO_ERROR;
|
||||
if ((len = u_charName(c, U_EXTENDED_CHAR_NAME, buf, sizeof(buf), &status)) >0 && !U_FAILURE(status)) {
|
||||
str.truncate(1);
|
||||
str.append(UnicodeString(buf, len, "")).append(closeDelimiter);
|
||||
if ((len = u_charName(c, U_EXTENDED_CHAR_NAME, buf, maxLen, &status)) >0 && !U_FAILURE(status)) {
|
||||
str.truncate(OPEN_DELIM_LEN);
|
||||
str.append(UnicodeString(buf, len, "")).append(CLOSE_DELIM);
|
||||
text.handleReplaceBetween(cursor, cursor+clen, str);
|
||||
len += 2; // adjust for delimiters
|
||||
len += OPEN_DELIM_LEN + 1; // adjust for delimiters
|
||||
cursor += len; // advance cursor and adjust for new text
|
||||
limit += len-clen; // change in length
|
||||
} else {
|
||||
|
@ -109,6 +109,8 @@ void UnicodeNameTransliterator::handleTransliterate(Replaceable& text, UTransPos
|
|||
offsets.contextLimit += limit - offsets.limit;
|
||||
offsets.limit = limit;
|
||||
offsets.start = cursor;
|
||||
|
||||
uprv_free(buf);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -16,27 +16,15 @@ U_NAMESPACE_BEGIN
|
|||
|
||||
/**
|
||||
* A transliterator that performs character to name mapping.
|
||||
* It generates the Perl syntax \N{name}.
|
||||
* @author Alan Liu
|
||||
*/
|
||||
class U_I18N_API UnicodeNameTransliterator : public Transliterator {
|
||||
|
||||
UChar32 openDelimiter;
|
||||
UChar32 closeDelimiter;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Constructs a transliterator.
|
||||
* @param openDelimiter the open delimiter character.
|
||||
* @param closeDelimiter the close delimiter character.
|
||||
* @param adoptedFilter the filter to be adopted.
|
||||
*/
|
||||
UnicodeNameTransliterator(UChar32 openDelimiter, UChar32 closeDelimiter,
|
||||
UnicodeFilter* adoptedFilter = 0);
|
||||
|
||||
/**
|
||||
* Constructs a transliterator with the default delimiters '{' and
|
||||
* '}'.
|
||||
* @param adoptedFilter the filter to be adopted.
|
||||
*/
|
||||
UnicodeNameTransliterator(UnicodeFilter* adoptedFilter = 0);
|
||||
|
|
|
@ -1180,13 +1180,28 @@ void TransliteratorTest::TestNameMap(void) {
|
|||
return;
|
||||
}
|
||||
|
||||
// Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
|
||||
expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
|
||||
CharsToUnicodeString("{NO-BREAK SPACE}abc{CJK UNIFIED IDEOGRAPH-4E01}{MICRO SIGN}{GUJARATI SIGN CANDRABINDU}{REPLACEMENT CHARACTER}{END OF TRANSMISSION}{CHARACTER TABULATION}{<control-0081>}{<noncharacter-FFFF>}"));
|
||||
expect(*name2uni, "{ NO-BREAK SPACE}abc{ CJK UNIFIED IDEOGRAPH-4E01 }{x{MICRO SIGN}{GUJARATI SIGN CANDRABINDU}{REPLACEMENT CHARACTER}{END OF TRANSMISSION}{CHARACTER TABULATION}{<control-0081>}{<noncharacter-FFFF>}{<control-0004>}{",
|
||||
CharsToUnicodeString("\\u00A0abc\\u4E01{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004{"));
|
||||
CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
|
||||
expect(*name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
|
||||
CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
|
||||
|
||||
delete uni2name;
|
||||
delete name2uni;
|
||||
|
||||
// round trip
|
||||
Transliterator* t =
|
||||
Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
|
||||
if (t==0) {
|
||||
errln("FAIL: createInstance returned NULL");
|
||||
delete t;
|
||||
return;
|
||||
}
|
||||
|
||||
// Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
|
||||
UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
|
||||
expect(*t, s, s);
|
||||
delete t;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1678,7 +1693,7 @@ void TransliteratorTest::TestSupplemental() {
|
|||
|
||||
expectT("Any-Name",
|
||||
CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
|
||||
"{GOTHIC LETTER AHSA}{TAG LATIN SMALL LETTER A}{NO-BREAK SPACE}");
|
||||
"\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
|
||||
|
||||
expectT("Any-Hex/Unicode",
|
||||
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
|
||||
|
@ -3541,12 +3556,13 @@ void TransliteratorTest::TestUserFunction() {
|
|||
_TUFReg("Any-gif", t, 0);
|
||||
|
||||
t = Transliterator::createFromRules("RemoveCurly",
|
||||
"[\\{\\}] > ;",
|
||||
"[\\{\\}] > ; '\\N' > ;",
|
||||
UTRANS_FORWARD, pe, ec);
|
||||
if (t == NULL || U_FAILURE(ec)) {
|
||||
errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
|
||||
goto FAIL;
|
||||
}
|
||||
expect(*t, "\\N{name}", "name");
|
||||
_TUFReg("Any-RemoveCurly", t, 1);
|
||||
|
||||
logln("Trying &hex");
|
||||
|
@ -3588,7 +3604,7 @@ void TransliteratorTest::TestUserFunction() {
|
|||
|
||||
// Test that filters are allowed after &
|
||||
t = Transliterator::createFromRules("test",
|
||||
"(.) > &Hex($1) ' ' &[\\{\\}]Remove(&Name($1)) ' ';",
|
||||
"(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
|
||||
UTRANS_FORWARD, pe, ec);
|
||||
if (t == NULL || U_FAILURE(ec)) {
|
||||
errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
|
||||
|
|
Loading…
Add table
Reference in a new issue