ICU-1681 simply use U_EXTENDED_CHAR_NAME to do the transliteration.

X-SVN-Rev: 7658
This commit is contained in:
Yves Arrouye 2002-02-14 05:45:39 +00:00
parent 348a0c3880
commit 327f965b6b
2 changed files with 20 additions and 50 deletions

View file

@ -110,7 +110,7 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
break;
case 1: // after open delimiter
// Look for [-a-zA-Z0-9+]. If \w+ is found, convert it
// Look for [-a-zA-Z0-9<>]. If \w+ is found, convert it
// to a single space. If closeDelimiter is found, exit
// the loop. If any other character is found, exit the
// loop. If the limit is found, exit the loop.
@ -134,35 +134,11 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
buf[ibuf] = 0; // Add terminating zero
UErrorCode status = U_ZERO_ERROR;
UChar32 ch = UCHAR_MAX_VALUE + 1;
UChar32 ch;
// Try in this order: U+XXXX (and bail out if we cannot
// decode it), or Unicode name then Unicode 1.0 name.
if (ibuf >= 6 && buf[0] == 0x0055 && buf[1] == 0x002B) {
// We've found a U+ prefix, compute the value.
ch = 0;
int32_t jbuf = 2;
for (; jbuf < ibuf; ++jbuf) {
if (buf[jbuf] >= 0x0030 && buf[jbuf] <= 0x0039) {
ch = (ch << 4) + buf[jbuf] - 0x0030;
} else if (buf[jbuf] >= 0x0041 && buf[jbuf] <= 0x0046) {
ch = (ch << 4) + buf[jbuf] - 0x0041 + 10;
} else {
ch = UCHAR_MAX_VALUE + 1;
break;
}
}
} else {
u_UCharsToChars(buf, cbuf, ibuf+1);
ch = u_charFromName(U_UNICODE_CHAR_NAME, cbuf, &status);
if (ch == (UChar32) 0xFFFF || U_FAILURE(status)) {
status = U_ZERO_ERROR;
ch = u_charFromName(U_UNICODE_10_CHAR_NAME, cbuf, &status);
}
if (ch == (UChar32) 0xFFFF) ch = UCHAR_MAX_VALUE + 1;
}
if (ch != (UChar32) (UCHAR_MAX_VALUE + 1) && U_SUCCESS(status)) {
u_UCharsToChars(buf, cbuf, ibuf+1);
ch = u_charFromName(U_EXTENDED_CHAR_NAME, cbuf, &status);
if (U_SUCCESS(status)) {
// Lookup succeeded
str.truncate(0);
str.append(ch);
@ -182,15 +158,13 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
continue;
}
//if (c >= (UChar)0x0061 && c <= (UChar)0x007A) {
// c -= 0x0020; // [a-z] => [A-Z]
//}
// Check if c =~ [-A-Z0-9+]
// Check if c =~ [-A-Za-z0-9<> ]
if (c == (UChar)0x002D ||
(c >= (UChar)0x0041 && c <= (UChar)0x005A) ||
(c >= (UChar)0x0061 && c <= (UChar)0x007A) ||
(c >= (UChar)0x0030 && c <= (UChar)0x0039) ||
c == (UChar)0x002B) {
c == (UChar)0x003C || c == (UChar)0x003E ||
c == 0x0020) {
buf[ibuf++] = (char) c;
// If we go a bit past the longest possible name then abort
if (ibuf == (LONGEST_NAME + 4)) {

View file

@ -92,22 +92,18 @@ void UnicodeNameTransliterator::handleTransliterate(Replaceable& text, UTransPos
while (cursor < limit) {
UChar32 c = text.char32At(cursor);
status = U_ZERO_ERROR;
if ((len = u_charName(c, U_UNICODE_CHAR_NAME, buf, sizeof(buf), &status)) <= 0 || U_FAILURE(status)) {
status = U_ZERO_ERROR;
if ((len = u_charName(c, U_UNICODE_10_CHAR_NAME, buf, sizeof(buf), &status)) <= 0 || U_FAILURE(status)) {
sprintf(buf, "U+%04lX", c);
len = uprv_strlen(buf);
}
}
str.truncate(1);
str.append(UnicodeString(buf, len, "")).append(closeDelimiter);
int32_t clen = UTF_CHAR_LENGTH(c);
text.handleReplaceBetween(cursor, cursor+clen, str);
len += 2; // adjust for delimiters
cursor += len; // advance cursor and adjust for new text
limit += len-clen; // change in length
status = U_ZERO_ERROR;
if ((len = u_charName(c, U_EXTENDED_CHAR_NAME, buf, sizeof(buf), &status)) >0 && !U_FAILURE(status)) {
str.truncate(1);
str.append(UnicodeString(buf, len, "")).append(closeDelimiter);
text.handleReplaceBetween(cursor, cursor+clen, str);
len += 2; // adjust for delimiters
cursor += len; // advance cursor and adjust for new text
limit += len-clen; // change in length
} else {
cursor += clen;
}
}
offsets.contextLimit += limit - offsets.limit;