From 327f965b6bbe889429af950197d29bc2ef84f065 Mon Sep 17 00:00:00 2001 From: Yves Arrouye Date: Thu, 14 Feb 2002 05:45:39 +0000 Subject: [PATCH] ICU-1681 simply use U_EXTENDED_CHAR_NAME to do the transliteration. X-SVN-Rev: 7658 --- icu4c/source/i18n/name2uni.cpp | 44 +++++++--------------------------- icu4c/source/i18n/uni2name.cpp | 26 +++++++++----------- 2 files changed, 20 insertions(+), 50 deletions(-) diff --git a/icu4c/source/i18n/name2uni.cpp b/icu4c/source/i18n/name2uni.cpp index 8d0109c9747..a005271d8b8 100644 --- a/icu4c/source/i18n/name2uni.cpp +++ b/icu4c/source/i18n/name2uni.cpp @@ -110,7 +110,7 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos break; case 1: // after open delimiter - // Look for [-a-zA-Z0-9+]. If \w+ is found, convert it + // Look for [-a-zA-Z0-9<>]. If \w+ is found, convert it // to a single space. If closeDelimiter is found, exit // the loop. If any other character is found, exit the // loop. If the limit is found, exit the loop. @@ -134,35 +134,11 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos buf[ibuf] = 0; // Add terminating zero UErrorCode status = U_ZERO_ERROR; - UChar32 ch = UCHAR_MAX_VALUE + 1; + UChar32 ch; - // Try in this order: U+XXXX (and bail out if we cannot - // decode it), or Unicode name then Unicode 1.0 name. - - if (ibuf >= 6 && buf[0] == 0x0055 && buf[1] == 0x002B) { - // We've found a U+ prefix, compute the value. - ch = 0; - int32_t jbuf = 2; - for (; jbuf < ibuf; ++jbuf) { - if (buf[jbuf] >= 0x0030 && buf[jbuf] <= 0x0039) { - ch = (ch << 4) + buf[jbuf] - 0x0030; - } else if (buf[jbuf] >= 0x0041 && buf[jbuf] <= 0x0046) { - ch = (ch << 4) + buf[jbuf] - 0x0041 + 10; - } else { - ch = UCHAR_MAX_VALUE + 1; - break; - } - } - } else { - u_UCharsToChars(buf, cbuf, ibuf+1); - ch = u_charFromName(U_UNICODE_CHAR_NAME, cbuf, &status); - if (ch == (UChar32) 0xFFFF || U_FAILURE(status)) { - status = U_ZERO_ERROR; - ch = u_charFromName(U_UNICODE_10_CHAR_NAME, cbuf, &status); - } - if (ch == (UChar32) 0xFFFF) ch = UCHAR_MAX_VALUE + 1; - } - if (ch != (UChar32) (UCHAR_MAX_VALUE + 1) && U_SUCCESS(status)) { + u_UCharsToChars(buf, cbuf, ibuf+1); + ch = u_charFromName(U_EXTENDED_CHAR_NAME, cbuf, &status); + if (U_SUCCESS(status)) { // Lookup succeeded str.truncate(0); str.append(ch); @@ -182,15 +158,13 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos continue; } - //if (c >= (UChar)0x0061 && c <= (UChar)0x007A) { - // c -= 0x0020; // [a-z] => [A-Z] - //} - - // Check if c =~ [-A-Z0-9+] + // Check if c =~ [-A-Za-z0-9<> ] if (c == (UChar)0x002D || (c >= (UChar)0x0041 && c <= (UChar)0x005A) || + (c >= (UChar)0x0061 && c <= (UChar)0x007A) || (c >= (UChar)0x0030 && c <= (UChar)0x0039) || - c == (UChar)0x002B) { + c == (UChar)0x003C || c == (UChar)0x003E || + c == 0x0020) { buf[ibuf++] = (char) c; // If we go a bit past the longest possible name then abort if (ibuf == (LONGEST_NAME + 4)) { diff --git a/icu4c/source/i18n/uni2name.cpp b/icu4c/source/i18n/uni2name.cpp index e8e83094e44..81cafc2b082 100644 --- a/icu4c/source/i18n/uni2name.cpp +++ b/icu4c/source/i18n/uni2name.cpp @@ -92,22 +92,18 @@ void UnicodeNameTransliterator::handleTransliterate(Replaceable& text, UTransPos while (cursor < limit) { UChar32 c = text.char32At(cursor); - status = U_ZERO_ERROR; - if ((len = u_charName(c, U_UNICODE_CHAR_NAME, buf, sizeof(buf), &status)) <= 0 || U_FAILURE(status)) { - status = U_ZERO_ERROR; - if ((len = u_charName(c, U_UNICODE_10_CHAR_NAME, buf, sizeof(buf), &status)) <= 0 || U_FAILURE(status)) { - sprintf(buf, "U+%04lX", c); - len = uprv_strlen(buf); - } - } - - str.truncate(1); - str.append(UnicodeString(buf, len, "")).append(closeDelimiter); int32_t clen = UTF_CHAR_LENGTH(c); - text.handleReplaceBetween(cursor, cursor+clen, str); - len += 2; // adjust for delimiters - cursor += len; // advance cursor and adjust for new text - limit += len-clen; // change in length + status = U_ZERO_ERROR; + if ((len = u_charName(c, U_EXTENDED_CHAR_NAME, buf, sizeof(buf), &status)) >0 && !U_FAILURE(status)) { + str.truncate(1); + str.append(UnicodeString(buf, len, "")).append(closeDelimiter); + text.handleReplaceBetween(cursor, cursor+clen, str); + len += 2; // adjust for delimiters + cursor += len; // advance cursor and adjust for new text + limit += len-clen; // change in length + } else { + cursor += clen; + } } offsets.contextLimit += limit - offsets.limit;