diff --git a/icu4c/source/i18n/name2uni.cpp b/icu4c/source/i18n/name2uni.cpp new file mode 100644 index 00000000000..6490229dff4 --- /dev/null +++ b/icu4c/source/i18n/name2uni.cpp @@ -0,0 +1,183 @@ +/* +********************************************************************** +* Copyright (C) 2001, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 06/07/01 aliu Creation. +********************************************************************** +*/ + +#include "unicode/name2uni.h" +#include "unicode/unifilt.h" +#include "unicode/unicode.h" + +const char* NameUnicodeTransliterator::_ID = "Name-Any"; + +// As of Unicode 3.0.0, the longest name is 83 characters long. +#define LONGEST_NAME 83 + +/** + * Constructs a transliterator. + */ +NameUnicodeTransliterator::NameUnicodeTransliterator( + UChar32 openDelim, UChar32 closeDelim, + UnicodeFilter* adoptedFilter) : + Transliterator(_ID, adoptedFilter), + openDelimiter(openDelim), + closeDelimiter(closeDelim) { +} + +/** + * Constructs a transliterator with the default delimiters '{' and + * '}'. + */ +NameUnicodeTransliterator::NameUnicodeTransliterator(UnicodeFilter* adoptedFilter) : + Transliterator(_ID, adoptedFilter), + openDelimiter((UChar) 0x007B /*{*/), + closeDelimiter((UChar) 0x007D /*}*/) { +} + +/** + * Destructor. + */ +NameUnicodeTransliterator::~NameUnicodeTransliterator() {} + +/** + * Copy constructor. + */ +NameUnicodeTransliterator::NameUnicodeTransliterator(const NameUnicodeTransliterator& o) : + Transliterator(o), + openDelimiter(o.openDelimiter), + closeDelimiter(o.closeDelimiter) {} + +/** + * Assignment operator. + */ +NameUnicodeTransliterator& NameUnicodeTransliterator::operator=( + const NameUnicodeTransliterator& o) { + Transliterator::operator=(o); + openDelimiter = o.openDelimiter; + closeDelimiter = o.closeDelimiter; + return *this; +} + +/** + * Transliterator API. + */ +Transliterator* NameUnicodeTransliterator::clone(void) const { + return new NameUnicodeTransliterator(*this); +} + +/** + * Implements {@link Transliterator#handleTransliterate}. + */ +void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets, + UBool isIncremental) const { + // Accomodate the longest possible name plus padding + char buf[LONGEST_NAME + 8]; + + // The only characters used in names are (as of Unicode 3.0.0): + // -0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ + // (first character is a space). + + int32_t cursor = offsets.start; + int32_t limit = offsets.limit; + + // Modes: + // 0 - looking for open delimiter + // 1 - after open delimiter + int32_t mode = 0; + int32_t ibuf = 0; + int32_t openPos = offsets.start; // position of openDelimiter + + UnicodeString str; + + for (; cursor < limit; ++cursor) { + UChar c = filteredCharAt(text, cursor); + + switch (mode) { + case 0: // looking for open delimiter + if (c == openDelimiter) { + openPos = cursor; + mode = 1; + ibuf = 0; + } + break; + + case 1: // after open delimiter + // Look for [-a-zA-Z0-9]. If \w+ is found, convert it + // to a single space. If closeDelimiter is found, exit + // the loop. If any other character is found, exit the + // loop. If the limit is found, exit the loop. + if (Unicode::isWhitespace(c)) { + // Ignore leading whitespace + if (ibuf != 0 && buf[ibuf-1] != (UChar)0x0020) { + buf[ibuf++] = (UChar)0x0020 /* */; + // If we go a bit past the longest possible name then abort + if (ibuf == (LONGEST_NAME + 4)) { + mode = 0; + } + } + continue; + } + + if (c == closeDelimiter) { + // Delete trailing space, if any + if (ibuf > 0 && buf[ibuf-1] == (UChar)0x0020) { + --ibuf; + } + buf[ibuf] = 0; // Add terminating zero + UErrorCode status = U_ZERO_ERROR; + UChar32 ch = u_charFromName(U_UNICODE_CHAR_NAME, buf, &status); + if (ch != (UChar32) 0xFFFF && U_SUCCESS(status)) { + // Lookup succeeded + str.truncate(0); + str.append(ch); + text.handleReplaceBetween(openPos, cursor+1, str); + + // Adjust indices for the change in the length of + // the string. Do not assume that str.length() == + // 1, in case of surrogates. + int32_t delta = cursor + 1 - openPos - str.length(); + cursor -= delta; + limit -= delta; + // assert(cursor == openPos + str.length()); + } + // If the lookup failed, we leave things as-is and + // still switch to mode 0 and continue. + mode = 0; + continue; + } + + if (c >= (UChar)0x0061 && c <= (UChar)0x007A) { + c -= 0x0020; // [a-z] => [A-Z] + } + + // Check if c =~ [-A-Z0-9] + if (c == (UChar)0x002D || + (c >= (UChar)0x0041 && c <= (UChar)0x005A) || + (c >= (UChar)0x0030 && c <= (UChar)0x0039)) { + buf[ibuf++] = (char) c; + // If we go a bit past the longest possible name then abort + if (ibuf == (LONGEST_NAME + 4)) { + mode = 0; + } + } + + // Invalid character + else { + --cursor; // Backup and reprocess this character + mode = 0; + } + + break; + } + } + + offsets.contextLimit += limit - offsets.limit; + offsets.limit = limit; + // In incremental mode, only advance the cursor up to the last + // open delimiter, if we are in mode 1. + offsets.start = (mode == 1 && isIncremental) ? openPos : cursor; +} diff --git a/icu4c/source/i18n/uni2name.cpp b/icu4c/source/i18n/uni2name.cpp new file mode 100644 index 00000000000..047670bf2fb --- /dev/null +++ b/icu4c/source/i18n/uni2name.cpp @@ -0,0 +1,108 @@ +/* +********************************************************************** +* Copyright (C) 2001, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 06/06/01 aliu Creation. +********************************************************************** +*/ + +#include "unicode/uni2name.h" +#include "unicode/unifilt.h" + +const char* UnicodeNameTransliterator::_ID = "Any-Name"; + +/** + * Constructs a transliterator. + */ +UnicodeNameTransliterator::UnicodeNameTransliterator( + UChar32 openDelim, UChar32 closeDelim, + UnicodeFilter* adoptedFilter) : + Transliterator(_ID, adoptedFilter), + openDelimiter(openDelim), + closeDelimiter(closeDelim) { +} + +/** + * Constructs a transliterator with the default delimiters '{' and + * '}'. + */ +UnicodeNameTransliterator::UnicodeNameTransliterator(UnicodeFilter* adoptedFilter) : + Transliterator(_ID, adoptedFilter), + openDelimiter((UChar) 0x007B /*{*/), + closeDelimiter((UChar) 0x007D /*}*/) { +} + +/** + * Destructor. + */ +UnicodeNameTransliterator::~UnicodeNameTransliterator() {} + +/** + * Copy constructor. + */ +UnicodeNameTransliterator::UnicodeNameTransliterator(const UnicodeNameTransliterator& o) : + Transliterator(o), + openDelimiter(o.openDelimiter), + closeDelimiter(o.closeDelimiter) {} + +/** + * Assignment operator. + */ +UnicodeNameTransliterator& UnicodeNameTransliterator::operator=( + const UnicodeNameTransliterator& o) { + Transliterator::operator=(o); + openDelimiter = o.openDelimiter; + closeDelimiter = o.closeDelimiter; + return *this; +} + +/** + * Transliterator API. + */ +Transliterator* UnicodeNameTransliterator::clone(void) const { + return new UnicodeNameTransliterator(*this); +} + +/** + * Implements {@link Transliterator#handleTransliterate}. + */ +void UnicodeNameTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets, + UBool isIncremental) const { + // As of Unicode 3.0.0, the longest name is 83 characters long. + // Adjust this buffer size as needed. + char buf[128]; + + int32_t cursor = offsets.start; + int32_t limit = offsets.limit; + + const UnicodeFilter* filt = getFilter(); + UnicodeString str(openDelimiter); + UErrorCode status; + UTextOffset len; + + while (cursor < limit) { + status = U_ZERO_ERROR; + UChar c = text.charAt(cursor); + if ((filt == 0 || filt->contains(c)) && + (len=u_charName(c, U_UNICODE_CHAR_NAME, buf, sizeof(buf), &status)) > 0 && + !U_FAILURE(status)) { + + str.truncate(1); + str.append(UnicodeString(buf, len, "")).append(closeDelimiter); + + text.handleReplaceBetween(cursor, cursor+1, str); + len += 2; // adjust for delimiters + cursor += len; // advance cursor by 1 and adjust for new text + limit += len-1; // change in length is (len - 1) + } else { + ++cursor; + continue; + } + } + + offsets.contextLimit += limit - offsets.limit; + offsets.limit = limit; + offsets.start = cursor; +} diff --git a/icu4c/source/i18n/unicode/name2uni.h b/icu4c/source/i18n/unicode/name2uni.h new file mode 100644 index 00000000000..f6678d7fa10 --- /dev/null +++ b/icu4c/source/i18n/unicode/name2uni.h @@ -0,0 +1,71 @@ +/* +********************************************************************** +* Copyright (C) 2001, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 06/07/01 aliu Creation. +********************************************************************** +*/ +#ifndef NAME2UNI_H +#define NAME2UNI_H + +#include "unicode/translit.h" + +/** + * A transliterator that performs name to character mapping. + * @author Alan Liu + */ +class U_I18N_API NameUnicodeTransliterator : public Transliterator { + + UChar32 openDelimiter; + UChar32 closeDelimiter; + + public: + + /** + * Constructs a transliterator. + */ + NameUnicodeTransliterator(UChar32 openDelimiter, UChar32 closeDelimiter, + UnicodeFilter* adoptedFilter = 0); + + /** + * Constructs a transliterator with the default delimiters '{' and + * '}'. + */ + NameUnicodeTransliterator(UnicodeFilter* adoptedFilter = 0); + + /** + * Destructor. + */ + virtual ~NameUnicodeTransliterator(); + + /** + * Copy constructor. + */ + NameUnicodeTransliterator(const NameUnicodeTransliterator&); + + /** + * Assignment operator. + */ + NameUnicodeTransliterator& operator=(const NameUnicodeTransliterator&); + + /** + * Transliterator API. + */ + Transliterator* clone(void) const; + + protected: + + /** + * Implements {@link Transliterator#handleTransliterate}. + * @draft + */ + virtual void handleTransliterate(Replaceable& text, UTransPosition& offset, + UBool isIncremental) const; + + private: + + static const char* _ID; +}; +#endif diff --git a/icu4c/source/i18n/unicode/uni2name.h b/icu4c/source/i18n/unicode/uni2name.h new file mode 100644 index 00000000000..f4c31bb41d2 --- /dev/null +++ b/icu4c/source/i18n/unicode/uni2name.h @@ -0,0 +1,71 @@ +/* +********************************************************************** +* Copyright (C) 2001, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 06/06/01 aliu Creation. +********************************************************************** +*/ +#ifndef UNI2NAME_H +#define UNI2NAME_H + +#include "unicode/translit.h" + +/** + * A transliterator that performs character to name mapping. + * @author Alan Liu + */ +class U_I18N_API UnicodeNameTransliterator : public Transliterator { + + UChar32 openDelimiter; + UChar32 closeDelimiter; + + public: + + /** + * Constructs a transliterator. + */ + UnicodeNameTransliterator(UChar32 openDelimiter, UChar32 closeDelimiter, + UnicodeFilter* adoptedFilter = 0); + + /** + * Constructs a transliterator with the default delimiters '{' and + * '}'. + */ + UnicodeNameTransliterator(UnicodeFilter* adoptedFilter = 0); + + /** + * Destructor. + */ + virtual ~UnicodeNameTransliterator(); + + /** + * Copy constructor. + */ + UnicodeNameTransliterator(const UnicodeNameTransliterator&); + + /** + * Assignment operator. + */ + UnicodeNameTransliterator& operator=(const UnicodeNameTransliterator&); + + /** + * Transliterator API. + */ + Transliterator* clone(void) const; + + protected: + + /** + * Implements {@link Transliterator#handleTransliterate}. + * @draft + */ + virtual void handleTransliterate(Replaceable& text, UTransPosition& offset, + UBool isIncremental) const; + + private: + + static const char* _ID; +}; +#endif