From 51632583391d675f9fd4d33092d9544e8d360ea2 Mon Sep 17 00:00:00 2001
From: Syn Wee Quek <swquek@svn.icu-project.org>
Date: Thu, 19 Sep 2002 21:19:04 +0000
Subject: [PATCH] ICU-2285 * clean up code * added iso comments * added get max
 names length * added get names set * shifted UCharacterName and related code
 to impl

X-SVN-Rev: 9892
---
 .../src/com/ibm/icu/impl/UCharacterName.java  | 1710 +++++++++++++++++
 .../{lang => impl}/UCharacterNameChoice.java  |   17 +-
 .../{lang => impl}/UCharacterNameReader.java  |    8 +-
 .../UCharacterUtility.java}                   |   67 +-
 icu4j/src/com/ibm/icu/lang/UCharacter.java    |   81 +-
 .../com/ibm/icu/lang/UCharacterCategory.java  |   63 +-
 .../src/com/ibm/icu/lang/UCharacterName.java  | 1181 ------------
 .../ibm/icu/lang/UCharacterNameIterator.java  |   14 +-
 8 files changed, 1816 insertions(+), 1325 deletions(-)
 create mode 100644 icu4j/src/com/ibm/icu/impl/UCharacterName.java
 rename icu4j/src/com/ibm/icu/{lang => impl}/UCharacterNameChoice.java (73%)
 mode change 100755 => 100644
 rename icu4j/src/com/ibm/icu/{lang => impl}/UCharacterNameReader.java (97%)
 mode change 100755 => 100644
 rename icu4j/src/com/ibm/icu/{lang/UCharacterUtil.java => impl/UCharacterUtility.java} (72%)
 mode change 100755 => 100644
 delete mode 100755 icu4j/src/com/ibm/icu/lang/UCharacterName.java
diff --git a/icu4j/src/com/ibm/icu/impl/UCharacterName.java b/icu4j/src/com/ibm/icu/impl/UCharacterName.java
new file mode 100644
index 00000000000..c019adb1b94
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/impl/UCharacterName.java
@@ -0,0 +1,1710 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2001, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+* $Source: 
+*     /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $ 
+* $Date: 2002/09/19 21:19:04 $ 
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+package com.ibm.icu.impl;
+
+import java.io.InputStream;
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import com.ibm.icu.impl.Utility;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.lang.UCharacterCategory;
+
+/**
+* Internal class to manage character names.
+* Since data in <a href=UCharacterNameDB.html>UCharacterNameDB</a> is stored
+* in an array of char, by default indexes used in this class is refering to 
+* a 2 byte count, unless otherwise stated. Cases where the index is refering 
+* to a byte count, the index is halved and depending on whether the index is 
+* even or odd, the MSB or LSB of the result char at the halved index is 
+* returned. For indexes to an array of int, the index is multiplied by 2, 
+* result char at the multiplied index and its following char is returned as an 
+* int.
+* <a href=UCharacter.html>UCharacter</a> acts as a public facade for this class
+* Note : 0 - 0x1F are control characters without names in Unicode 3.0
+* Information on parsing of the binary data is located at
+* <a href=oss.software.ibm.com/icu4j/icu4jhtml/com/ibm/icu/text/readme.html>
+* ReadMe</a>
+* @author Syn Wee Quek
+* @since nov0700
+*/
+
+public final class UCharacterName
+{   
+    // public data members ----------------------------------------------
+    
+    /**
+    * Number of lines per group 
+    * 1 << GROUP_SHIFT_
+    */
+    public static final int LINES_PER_GROUP_ = 1 << 5;
+    /**
+     * Maximum number of groups
+     */
+    public int m_groupcount_ = 0;
+    
+    // public methods ---------------------------------------------------
+    
+    /**
+     * Gets the only instance of UCharacterName
+     * @return only instance of UCharacterName
+     * @exception RuntimeException thrown when reading of name data fails
+     */
+    public static UCharacterName getInstance() throws RuntimeException
+    {
+        if (INSTANCE_ == null) {
+            try {
+                INSTANCE_ = new UCharacterName();
+            }
+            catch (Exception e) {
+                throw new RuntimeException(e.getMessage());
+            }
+        }
+        return INSTANCE_;
+    }
+    
+    /**
+    * Retrieve the name of a Unicode code point.
+    * Depending on <code>choice</code>, the character name written into the 
+    * buffer is the "modern" name or the name that was defined in Unicode 
+    * version 1.0.
+    * The name contains only "invariant" characters
+    * like A-Z, 0-9, space, and '-'.
+    *
+    * @param ch the code point for which to get the name.
+    * @param choice Selector for which name to get.
+    * @return if code point is above 0x1fff, null is returned
+    */
+    public String getName(int ch, int choice)
+    {
+        if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE || 
+            choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) {
+            return null;
+        }
+        
+        String result = null;
+        
+        result = getAlgName(ch, choice);
+          
+        // getting normal character name
+        if (result == null || result.length() == 0) {
+            if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {      
+                result = getExtendedName(ch);   
+            } else {
+                result = getGroupName(ch, choice);
+            }
+        }
+          
+        return result;
+    }
+      
+    /**
+    * Find a character by its name and return its code point value
+    * @param character name
+    * @param choice selector to indicate if argument name is a Unicode 1.0 
+    *        or the most current version 
+    * @return code point
+    */
+    public int getCharFromName(int choice, String name)
+    {
+        // checks for illegal arguments
+        if (choice >= UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT || 
+            name == null || name.length() == 0) {
+            return -1;
+        }
+        
+        // try extended names first  
+        int result = getExtendedChar(name.toLowerCase(), choice);
+        if (result >= -1) {
+            return result;
+        }
+        
+        String upperCaseName = name.toUpperCase();
+        // try algorithmic names first, if fails then try group names
+        // int result = getAlgorithmChar(choice, uppercasename);
+        
+        if (choice != UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
+            int count = 0;
+            if (m_algorithm_ != null) {
+                count = m_algorithm_.length;
+            }
+            for (count --; count >= 0; count --) {
+                result = m_algorithm_[count].getChar(upperCaseName); 
+                if (result >= 0) {
+                    return result;
+                }
+            }
+        }
+            
+        if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
+            result = getGroupChar(upperCaseName, 
+                                  UCharacterNameChoice.UNICODE_CHAR_NAME);
+            if (result == -1) {
+                result = getGroupChar(upperCaseName, 
+                                  UCharacterNameChoice.UNICODE_10_CHAR_NAME);
+            }
+        }
+        else {
+            result = getGroupChar(upperCaseName, choice);
+        }
+        return result;
+    }
+    
+    // these are all UCharacterNameIterator use methods -------------------
+    
+    /**
+    * Reads a block of compressed lengths of 32 strings and expands them into 
+    * offsets and lengths for each string. Lengths are stored with a 
+    * variable-width encoding in consecutive nibbles:
+    * If a nibble<0xc, then it is the length itself (0 = empty string).
+    * If a nibble>=0xc, then it forms a length value with the following 
+    * nibble.
+    * The offsets and lengths arrays must be at least 33 (one more) long 
+    * because there is no check here at the end if the last nibble is still 
+    * used.
+    * @param index of group string object in array
+    * @param offsets array to store the value of the string offsets
+    * @param lengths array to store the value of the string length
+    * @return next index of the data string immediately after the lengths 
+    *         in terms of byte address
+    */
+    public int getGroupLengths(int index, char offsets[], char lengths[]) 
+    {
+        char length = 0xffff;
+        byte b = 0,
+            n = 0;
+        int shift;
+        index = index * m_groupsize_; // byte count offsets of group strings
+        int stringoffset = UCharacterUtility.toInt(
+                                 m_groupinfo_[index + OFFSET_HIGH_OFFSET_], 
+                                 m_groupinfo_[index + OFFSET_LOW_OFFSET_]);
+            
+        offsets[0] = 0;
+        
+        // all 32 lengths must be read to get the offset of the first group 
+        // string
+        for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) {
+            b = m_groupstring_[stringoffset];
+            shift = 4;
+              
+            while (shift >= 0) {
+                // getting nibble
+                n = (byte)((b >> shift) & 0x0F);   
+                if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) {
+                    length = (char)((n - 12) << 4);
+                }
+                else {
+                    if (length != 0xffff) {
+                       lengths[i] = (char)((length | n) + 12);
+                    }
+                    else {
+                       lengths[i] = (char)n;
+                    }
+                    
+                    if (i < LINES_PER_GROUP_) {
+                       offsets[i + 1] = (char)(offsets[i] + lengths[i]);
+                    }
+                    
+                    length = 0xffff;
+                    i ++;
+                }
+                      
+                shift -= 4;
+            }
+        }
+        return stringoffset;
+    }
+    
+    /**
+    * Gets the name of the argument group index.
+    * UnicodeData.txt uses ';' as a field separator, so no field can contain 
+    * ';' as part of its contents. In unames.icu, it is marked as 
+    * token[';'] == -1 only if the semicolon is used in the data file - which 
+    * is iff we have Unicode 1.0 names or ISO comments. 
+    * So, it will be token[';'] == -1 if we store U1.0 names/ISO comments 
+    * although we know that it will never be part of a name. 
+    * Equivalent to ICU4C's expandName.
+    * @param index of the group name string in byte count
+    * @param length of the group name string
+    * @param choice of Unicode 1.0 name or the most current name
+    * @return name of the group 
+    */
+    public String getGroupName(int index, int length, int choice) 
+    {
+        if (choice == UCharacterNameChoice.UNICODE_10_CHAR_NAME
+            || choice == UCharacterNameChoice.ISO_COMMENT_) {
+            if (';' >= m_tokentable_.length || m_tokentable_[';'] == 0xFFFF) {
+                // skip the modern name
+                int oldindex = index;
+                index += UCharacterUtility.skipByteSubString(m_groupstring_, 
+                                                   index, length, (byte)';');   
+                length -= (index - oldindex);                 
+                if (choice == UCharacterNameChoice.ISO_COMMENT_) {
+                    // skips the 1.0 Name to the iso comment part
+                    oldindex = index;
+                    index += UCharacterUtility.skipByteSubString(m_groupstring_, 
+                                                    index, length, (byte)';');   
+                    length -= (index - oldindex);
+                }
+            }
+            else {
+                // the semicolon byte is a token number, therefore only modern
+                // names are stored in unames.dat and there is no such 
+                // requested Unicode 1.0 name here
+                length = 0;
+            }
+        }
+        
+        synchronized (m_utilStringBuffer_) {
+            m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
+            byte b;
+            char token;
+            for (int i = 0; i < length;) {
+                b = m_groupstring_[index + i];
+                i ++;
+                  
+                if (b >= m_tokentable_.length) {
+                    if (b == ';') {
+                        break;
+                    }
+                    m_utilStringBuffer_.append(b); // implicit letter
+                }
+                else {
+                    token = m_tokentable_[b & 0x00ff];
+                    if (token == 0xFFFE) {
+                        // this is a lead byte for a double-byte token
+                        token = m_tokentable_[b << 8 | 
+                                          (m_groupstring_[index + i] & 0x00ff)];
+                        i ++;
+                    }
+                    if (token == 0xFFFF) {
+                        if (b == ';') {
+                            // skip the semicolon if we are seeking extended 
+                            // names and there was no 2.0 name but there
+                            // is a 1.0 name.
+                            if (m_utilStringBuffer_.length() == 0 && choice == 
+                                   UCharacterNameChoice.EXTENDED_CHAR_NAME) {
+                                continue;
+                            }
+                            break;
+                        }
+                        // explicit letter
+                        m_utilStringBuffer_.append((char)(b & 0x00ff)); 
+                    }
+                    else { // write token word
+                        UCharacterUtility.getNullTermByteSubString(
+                                m_utilStringBuffer_, m_tokenstring_, token);
+                    }
+                }
+            }
+    
+            if (m_utilStringBuffer_.length() > 0) {
+                return m_utilStringBuffer_.toString();
+            }
+        }
+        return null;
+    }
+    
+    /**
+    * Retrieves the extended name
+    */
+    public String getExtendedName(int ch) 
+    {    
+        String result = getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);    
+        if (result == null) {        
+            if (getType(ch) == UCharacterCategory.CONTROL) {            
+                result = getName(ch, 
+                                 UCharacterNameChoice.UNICODE_10_CHAR_NAME);        
+            }        
+            if (result == null) {            
+                result = getExtendedOr10Name(ch);
+            }
+        }    
+        return result;
+    }
+    
+    /**
+     * Gets the group index for the codepoint, or the group before it.
+     * @param codepoint
+     * @return group index containing codepoint or the group before it.
+     */
+    public int getGroup(int codepoint)
+    {
+        int endGroup = m_groupcount_;
+        int msb      = getCodepointMSB(codepoint);
+        int result   = 0;    
+        // binary search for the group of names that contains the one for 
+        // code
+        // find the group that contains codepoint, or the highest before it
+        while (result < endGroup - 1) {
+            int gindex = (result + endGroup) >> 1;
+            if (msb < getGroupMSB(gindex)) {
+                endGroup = gindex;
+            }
+            else {
+                result = gindex;
+            }
+        }
+        return result;
+    }
+    
+    /**
+     * Gets the extended and 1.0 name when the most current unicode names
+     * fail
+     * @param ch codepoint
+     * @return name of codepoint extended or 1.0
+     */
+    public String getExtendedOr10Name(int ch)
+    {
+        String result = null;
+        if (getType(ch) == UCharacterCategory.CONTROL) {            
+            result = getName(ch, 
+                             UCharacterNameChoice.UNICODE_10_CHAR_NAME);        
+        }        
+        if (result == null) {            
+            int type = getType(ch);    
+            // Return unknown if the table of names above is not up to 
+            // date.
+            if (type >= TYPE_NAMES_.length) {       
+                result = UNKNOWN_TYPE_NAME_;    
+            } 
+            else {        
+                result = TYPE_NAMES_[type];    
+            }
+            synchronized (m_utilStringBuffer_) {
+                m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
+                m_utilStringBuffer_.append('<');
+                m_utilStringBuffer_.append(result);
+                m_utilStringBuffer_.append('-');
+                String chStr = Integer.toHexString(ch).toUpperCase();
+                int zeros = 4 - chStr.length();
+                while (zeros > 0) {
+                    m_utilStringBuffer_.append('0');
+                    zeros --;
+                }
+                m_utilStringBuffer_.append(chStr);
+                m_utilStringBuffer_.append('>');
+                result = m_utilStringBuffer_.toString();
+            }
+        }
+        return result;
+    }
+    
+    /**
+     * Gets the MSB from the group index
+     * @param gindex group index
+     * @return the MSB of the group if gindex is valid, -1 otherwise
+     */
+    public int getGroupMSB(int gindex)
+    {
+        if (gindex >= m_groupcount_) {
+            return -1;
+        }
+        return m_groupinfo_[gindex * m_groupsize_];
+    }
+    
+    /**
+     * Gets the MSB of the codepoint
+     * @param codepoint 
+     * @return the MSB of the codepoint
+     */
+    public static int getCodepointMSB(int codepoint)
+    {
+        return codepoint >> GROUP_SHIFT_;
+    }
+    
+    /**
+     * Gets the maximum codepoint + 1 of the group
+     * @param msb most significant byte of the group
+     * @return limit codepoint of the group
+     */
+    public static int getGroupLimit(int msb)
+    {
+        return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_;
+    }
+    
+    /**
+     * Gets the minimum codepoint of the group
+     * @param msb most significant byte of the group
+     * @return minimum codepoint of the group
+     */
+    public static int getGroupMin(int msb)
+    {
+        return msb << GROUP_SHIFT_;
+    }
+    
+    /**
+     * Gets the offset to a group
+     * @param codepoint 
+     * @return offset to a group
+     */
+    public static int getGroupOffset(int codepoint)
+    {
+        return codepoint & GROUP_MASK_;
+    }
+
+    /**
+     * Gets the minimum codepoint of a group
+     * @param codepoint
+     * @return minimum codepoint in the group which codepoint belongs to
+     */
+    public static int getGroupMinFromCodepoint(int codepoint)
+    {
+        return codepoint & ~GROUP_MASK_;
+    }
+    
+    /**
+     * Get the Algorithm range length 
+     * @return Algorithm range length
+     */
+    public int getAlgorithmLength()
+    {
+        return m_algorithm_.length;
+    }
+        
+    /**
+     * Gets the start of the range
+     * @param index algorithm index
+     * @return algorithm range start
+     */
+    public int getAlgorithmStart(int index)
+    {
+        return m_algorithm_[index].m_rangestart_;
+    }
+        
+    /**
+     * Gets the end of the range
+     * @param index algorithm index
+     * @return algorithm range end
+     */
+    public int getAlgorithmEnd(int index)
+    {
+        return m_algorithm_[index].m_rangeend_;
+    }
+    
+    /**
+     * Gets the Algorithmic name of the codepoint
+     * @param index algorithmic range index
+     * @param codepoint 
+     * @return algorithmic name of codepoint
+     */
+    public String getAlgorithmName(int index, int codepoint) 
+    {
+        String result = null;
+        synchronized (m_utilStringBuffer_) {
+            m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
+            m_algorithm_[index].appendName(codepoint, m_utilStringBuffer_);
+            result = m_utilStringBuffer_.toString();
+        }
+        return result;
+    }
+    
+    // these are transliterator use methods ---------------------------------
+    
+    /**
+     * Gets the maximum length of any codepoint name.
+     * Equivalent to uprv_getMaxCharNameLength.
+     * @return the maximum length of any codepoint name
+     */
+    public int getMaxCharNameLength() 
+    {
+        if (initNameSetsLengths()) {
+            return m_maxNameLength_;
+        } 
+        else {
+            return 0;
+        }
+    }
+    
+    /**
+     * Gets the maximum length of any iso comments.
+     * Equivalent to uprv_getMaxISOCommentLength.
+     * @return the maximum length of any codepoint name
+     */
+    public int getMaxISOCommentLength() 
+    {
+        if (initNameSetsLengths()) {
+            return m_maxISOCommentLength_;
+        } 
+        else {
+            return 0;
+        }
+    }
+    
+    /**
+     * Fills set with characters that are used in Unicode character names.
+     * Equivalent to uprv_getCharNameCharacters.
+     * @param set USet to receive characters. Existing contents are deleted.
+     */
+    public void getCharNameCharacters(UnicodeSet set) 
+    {
+        convert(m_nameSet_, set);
+    }
+    
+    /**
+     * Fills set with characters that are used in Unicode character names.
+     * Equivalent to uprv_getISOCommentCharacters.
+     * @param set USet to receive characters. Existing contents are deleted.
+     */
+    public void getISOCommentCharacters(UnicodeSet set) 
+    {
+        convert(m_ISOCommentSet_, set);
+    }
+    
+    // package private inner class --------------------------------------
+    
+    /**
+    * Algorithmic name class
+    */
+    static final class AlgorithmName
+    {
+        // package private data members ----------------------------------
+        
+        /**
+        * Constant type value of the different AlgorithmName
+        */
+        static final int TYPE_0_ = 0;
+        static final int TYPE_1_ = 1;
+        
+        // package private constructors ----------------------------------
+        
+        /**
+        * Constructor
+        */
+        AlgorithmName()
+        {
+        }
+        
+        // package private methods ---------------------------------------
+        
+        /**
+        * Sets the information for accessing the algorithmic names
+        * @param rangestart starting code point that lies within this name group
+        * @param rangeend end code point that lies within this name group
+        * @param type algorithm type. There's 2 kinds of algorithmic type. First 
+        *        which uses code point as part of its name and the other uses 
+        *        variant postfix strings
+        * @param variant algorithmic variant
+        * @return true if values are valid
+        */ 
+        boolean setInfo(int rangestart, int rangeend, byte type, byte variant)
+        {
+            if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend 
+                && rangeend <= UCharacter.MAX_VALUE && 
+                (type == TYPE_0_ || type == TYPE_1_)) {
+                m_rangestart_ = rangestart;
+                m_rangeend_ = rangeend;
+                m_type_ = type;
+                m_variant_ = variant;
+                return true;
+            }
+            return false;
+        }
+        
+        /**
+        * Sets the factor data
+        * @param array of factor
+        * @return true if factors are valid
+        */
+        boolean setFactor(char factor[])
+        {
+            if (factor.length == m_variant_) {
+                m_factor_ = factor;
+                return true;
+            }
+            return false;
+        }
+        
+        /**
+        * Sets the name prefix
+        * @param prefix
+        * @return true if prefix is set
+        */
+        boolean setPrefix(String prefix)
+        {
+            if (prefix != null && prefix.length() > 0) {
+                m_prefix_ = prefix;
+                return true;
+            }
+            return false;
+        }
+        
+        /**
+        * Sets the variant factorized name data 
+        * @param string variant factorized name data
+        * @return true if values are set
+        */
+        boolean setFactorString(byte string[])
+        {
+            // factor and variant string can be empty for things like 
+            // hanggul code points
+            m_factorstring_ = string;
+            return true;
+        }
+      
+        /**
+        * Checks if code point lies in Algorithm object at index
+        * @param ch code point 
+        */
+        boolean contains(int ch)
+        {
+            return m_rangestart_ <= ch && ch <= m_rangeend_;
+        }
+        
+        /**
+        * Appends algorithm name of code point into StringBuffer.
+        * Note this method does not check for validity of code point in Algorithm,
+        * result is undefined if code point does not belong in Algorithm.
+        * @param ch code point
+        * @param str StringBuffer to append to
+        */
+        void appendName(int ch, StringBuffer str)
+        {
+            str.append(m_prefix_);
+            switch (m_type_) 
+            {
+                case TYPE_0_: 
+                    // prefix followed by hex digits indicating variants
+                    Utility.hex(ch, m_variant_, str);
+                    break;
+                case TYPE_1_: 
+                    // prefix followed by factorized-elements
+                    int offset = ch - m_rangestart_;
+                    int indexes[] = m_utilIntBuffer_;
+                    int factor;
+                      
+                    // write elements according to the factors
+                    // the factorized elements are determined by modulo 
+                    // arithmetic
+                    synchronized (m_utilIntBuffer_) {
+                        for (int i = m_variant_ - 1; i > 0; i --) 
+                        {
+                            factor = m_factor_[i] & 0x00FF;
+                            indexes[i] = offset % factor;
+                            offset /= factor;
+                        }
+                          
+                        // we don't need to calculate the last modulus because 
+                        // start <= code <= end guarantees here that 
+                        // code <= factors[0]
+                        indexes[0] = offset;
+    
+                        // joining up the factorized strings 
+                        str.append(getFactorString(indexes, m_variant_));
+                    }
+                    break;
+            }
+        }
+        
+        /**
+        * Gets the character for the argument algorithmic name
+        * @return the algorithmic char or -1 otherwise.
+        */
+        int getChar(String name)
+        {
+            int prefixlen = m_prefix_.length();
+            if (name.length() < prefixlen || 
+                !m_prefix_.equals(name.substring(0, prefixlen))) {
+                return -1;
+            }
+                
+            switch (m_type_) 
+            {
+                case TYPE_0_ : 
+                try
+                {
+                    int result = Integer.parseInt(name.substring(prefixlen), 
+                                                  16);
+                    // does it fit into the range?
+                    if (m_rangestart_ <= result && result <= m_rangeend_) {
+                        return result;
+                    }
+                }
+                catch (NumberFormatException e)
+                {
+                    return -1;
+                }
+                break;
+                case TYPE_1_ : 
+                    // repetitative suffix name comparison done here
+                    // offset is the character code - start
+                    for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++)
+                    {
+                        int offset = ch - m_rangestart_;
+                        int indexes[] = m_utilIntBuffer_;
+                        int factor;
+                  
+                        // write elements according to the factors
+                        // the factorized elements are determined by modulo 
+                        // arithmetic
+                        synchronized (m_utilIntBuffer_) {
+                            for (int i = m_variant_ - 1; i > 0; i --) 
+                            {
+                                factor = m_factor_[i] & 0x00FF;
+                                indexes[i] = offset % factor;
+                                offset /= factor;
+                            }
+                            
+                            // we don't need to calculate the last modulus 
+                            // because start <= code <= end guarantees here that 
+                            // code <= factors[0]
+                            indexes[0] = offset;
+    
+                            // joining up the factorized strings 
+                            if (compareFactorString(indexes, m_variant_, name, 
+                                                    prefixlen)) {
+                                return ch;
+                            }
+                        }
+                    }
+            }
+
+            return -1;
+        }
+        
+        /**
+         * Adds all chars in the set of algorithmic names into the set.
+         * Equivalent to part of calcAlgNameSetsLengths.
+         * @param set int set to add the chars of the algorithm names into
+         * @param maxlength maximum length to compare to
+         * @return the length that is either maxlength of the length of this
+         *         algorithm name if it is longer than maxlength
+         */
+        int add(int set[], int maxlength) 
+        {
+            // prefix length
+            int length = UCharacterName.add(set, m_prefix_);
+            switch (m_type_) {
+                case TYPE_0_ : {
+                    // name = prefix + (range->variant times) hex-digits
+                    // prefix
+                    length += m_variant_;
+                    /* synwee to check 
+                     * addString(set, (const char *)(range + 1))
+                                       + range->variant;*/
+                    break;
+                }
+                case TYPE_1_ : {
+                    // name = prefix factorized-elements
+                    // get the set and maximum factor suffix length for each 
+                    // factor
+                    for (int i = m_variant_ - 1; i > 0; i --) 
+                    {
+                        int maxfactorlength = 0;
+                        int count = 0;
+                        for (int factor = m_factor_[i]; factor > 0; -- factor) {
+                            synchronized (m_utilStringBuffer_) {
+                                m_utilStringBuffer_.delete(0, 
+                                                m_utilStringBuffer_.length());
+                                count 
+                                  = UCharacterUtility.getNullTermByteSubString(
+                                                m_utilStringBuffer_, 
+                                                m_factorstring_, count);
+                                UCharacterName.add(set, m_utilStringBuffer_);
+                                if (m_utilStringBuffer_.length() 
+                                                            > maxfactorlength) 
+                                {
+                                    maxfactorlength 
+                                                = m_utilStringBuffer_.length();
+                                }
+                            }
+                        }
+                        length += maxfactorlength;
+                    }
+                }
+            }
+            if (length > maxlength) {
+                return length;
+            }
+            return maxlength;
+        }
+        
+        // private data members ------------------------------------------
+        
+        /**
+        * Algorithmic data information
+        */
+        private int m_rangestart_;
+        private int m_rangeend_;
+        private byte m_type_;
+        private byte m_variant_;
+        private char m_factor_[];
+        private String m_prefix_;
+        private byte m_factorstring_[];
+        /**
+         * Utility StringBuffer
+         */
+        private StringBuffer m_utilStringBuffer_ = new StringBuffer();
+        /**
+         * Utility int buffer
+         */
+        private int m_utilIntBuffer_[] = new int[256];
+        
+        // private methods -----------------------------------------------
+                
+        /**
+        * Gets the indexth string in each of the argument factor block
+        * @param index array with each index corresponding to each factor block
+        * @param length length of the array index
+        * @return the combined string of the array of indexth factor string in 
+        *         factor block
+        */
+        private String getFactorString(int index[], int length)
+        {
+            int size = m_factor_.length;
+            if (index == null || length != size) {
+                return null;
+            }
+                
+            synchronized (m_utilStringBuffer_) {
+                m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
+                int count = 0;
+                int factor;
+                size --;
+                for (int i = 0; i <= size; i ++) {
+                    factor = m_factor_[i];
+                    count = UCharacterUtility.skipNullTermByteSubString(
+                                             m_factorstring_, count, index[i]);
+                    count = UCharacterUtility.getNullTermByteSubString(
+                                          m_utilStringBuffer_, m_factorstring_, 
+                                          count);
+                    if (i != size) {
+                        count = UCharacterUtility.skipNullTermByteSubString(
+                                                       m_factorstring_, count, 
+                                                       factor - index[i] - 1);
+                    }
+                }
+                return m_utilStringBuffer_.toString();
+            }
+        }
+        
+        /**
+        * Compares the indexth string in each of the argument factor block with
+        * the argument string
+        * @param index array with each index corresponding to each factor block
+        * @param length index array length
+        * @param str string to compare with
+        * @param offset of str to start comparison
+        * @return true if string matches
+        */
+        private boolean compareFactorString(int index[], int length, String str, 
+                                            int offset)
+        {
+            int size = m_factor_.length;
+            if (index == null || length != size)
+                return false;
+                
+            int count = 0;
+            int strcount = offset;
+            int factor;
+            size --;
+            for (int i = 0; i <= size; i ++)
+            {
+                factor = m_factor_[i];
+                count = UCharacterUtility.skipNullTermByteSubString(
+                                          m_factorstring_, count, index[i]);
+                strcount = UCharacterUtility.compareNullTermByteSubString(str, 
+                                          m_factorstring_, strcount, count);
+                if (strcount < 0) {
+                    return false;
+                }
+                  
+                if (i != size) {
+                    count = UCharacterUtility.skipNullTermByteSubString(
+                                  m_factorstring_, count, factor - index[i]);
+                }
+            }
+            if (strcount != str.length()) {
+                return false;
+            }
+            return true;
+        }
+    }
+    
+    // package private data members --------------------------------------
+    
+    /**
+     * Size of each groups
+     */
+    int m_groupsize_ = 0;
+    
+    // package private methods --------------------------------------------
+    
+    /**
+    * Sets the token data
+    * @param token array of tokens
+    * @param tokenstring array of string values of the tokens
+    * @return false if there is a data error
+    */
+    boolean setToken(char token[], byte tokenstring[])
+    {
+        if (token != null && tokenstring != null && token.length > 0 &&
+            tokenstring.length > 0) {
+            m_tokentable_ = token;
+            m_tokenstring_ = tokenstring;
+            return true;
+        }
+        return false; 
+    }
+        
+    /**
+    * Set the algorithm name information array
+    * @param algorithm information array
+    * @return true if the group string offset has been set correctly
+    */
+    boolean setAlgorithm(AlgorithmName alg[])
+    {
+        if (alg != null && alg.length != 0) {
+            m_algorithm_ = alg;
+            return true;
+        }
+        return false;
+    }
+    
+    /**
+    * Sets the number of group and size of each group in number of char
+    * @param count number of groups
+    * @param size size of group in char
+    * @return true if group size is set correctly
+    */
+    boolean setGroupCountSize(int count, int size)
+    {
+        if (count <= 0 || size <= 0) {
+            return false;
+        }
+        m_groupcount_ = count;
+        m_groupsize_ = size;
+        return true;
+    }
+      
+    /**
+    * Sets the group name data
+    * @param group index information array
+    * @param groupstring name information array
+    * @return false if there is a data error
+    */
+    boolean setGroup(char group[], byte groupstring[])
+    {
+        if (group != null && groupstring != null && group.length > 0 &&
+            groupstring.length > 0) {
+            m_groupinfo_ = group;
+            m_groupstring_ = groupstring;
+            return true;
+        }
+        return false; 
+    }
+    
+    // private data members ----------------------------------------------
+    
+    /**
+    * Data used in unames.icu
+    */
+    private char m_tokentable_[];
+    private byte m_tokenstring_[];
+    private char m_groupinfo_[];
+    private byte m_groupstring_[];
+    private AlgorithmName m_algorithm_[];
+      
+    /**
+    * Group use
+    */
+    private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1];
+    private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1];
+      	 
+    /**
+    * Default name of the name datafile
+    */
+    private static final String NAME_FILE_NAME_ = 
+                                           "/com/ibm/icu/impl/data/unames.icu";
+    /**
+    * Shift count to retrieve group information
+    */
+    private static final int GROUP_SHIFT_ = 5;
+    /**
+    * Mask to retrieve the offset for a particular character within a group
+    */
+    private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1;
+    /**
+    * Default buffer size of datafile
+    */
+    private static final int NAME_BUFFER_SIZE_ = 100000;
+      
+    /**
+    * Position of offsethigh in group information array
+    */
+    private static final int OFFSET_HIGH_OFFSET_ = 1;
+      
+    /**
+    * Position of offsetlow in group information array
+    */
+    private static final int OFFSET_LOW_OFFSET_ = 2;
+    /**
+    * Double nibble indicator, any nibble > this number has to be combined
+    * with its following nibble
+    */
+    private static final int SINGLE_NIBBLE_MAX_ = 11;
+     
+    /** 
+     * Maximum length of character names (regular & 1.0). 
+     */ 
+    private static int MAX_NAME_LENGTH_ = 0; 
+    /**
+     * Maximum length of ISO comments. 
+     */
+    private static int MAX_ISO_COMMENT_LENGTH_ = 0; 
+    
+    /** 
+     * Set of chars used in character names (regular & 1.0). 
+     * Chars are platform-dependent (can be EBCDIC). 
+     */ 
+    private int m_nameSet_[] = new int[8]; 
+    /**
+     * Set of chars used in ISO comments. (regular & 1.0). 
+     * Chars are platform-dependent (can be EBCDIC). 
+     */
+    private int m_ISOCommentSet_[] = new int[8]; 
+    /**
+     * Utility StringBuffer
+     */
+    private StringBuffer m_utilStringBuffer_ = new StringBuffer();
+    /**
+     * Utility int buffer
+     */
+    private int m_utilIntBuffer_[] = new int[2];
+    /**
+     * Maximum ISO comment length
+     */
+    private int m_maxISOCommentLength_;
+    /**
+     * Maximum name length
+     */
+    private int m_maxNameLength_;
+    /**
+     * Singleton instance
+     */
+    private static UCharacterName INSTANCE_ = null;
+    /**
+     * Type names used for extended names
+     */
+    private static final String TYPE_NAMES_[] = {"unassigned",
+                                                 "uppercase letter",
+                                                 "lowercase letter",
+                                                 "titlecase letter",
+                                                 "modifier letter",
+                                                 "other letter",
+                                                 "non spacing mark",
+                                                 "enclosing mark",
+                                                 "combining spacing mark",
+                                                 "decimal digit number",
+                                                 "letter number",
+                                                 "other number",
+                                                 "space separator",
+                                                 "line separator",
+                                                 "paragraph separator",
+                                                 "control",
+                                                 "format",
+                                                 "private use area",
+                                                 "surrogate",
+                                                 "dash punctuation",   
+                                                 "start punctuation",
+                                                 "end punctuation",
+                                                 "connector punctuation",
+                                                 "other punctuation",
+                                                 "math symbol",
+                                                 "currency symbol",
+                                                 "modifier symbol",
+                                                 "other symbol",
+                                                 "initial punctuation",
+                                                 "final punctuation",
+                                                 "noncharacter",
+                                                 "lead surrogate",
+                                                 "trail surrogate"};
+    /**
+     * Unknown type name
+     */
+    private static final String UNKNOWN_TYPE_NAME_ = "unknown";
+    /**
+     * Not a character type
+     */
+    private static final int NON_CHARACTER_ 
+                                    = UCharacterCategory.CHAR_CATEGORY_COUNT;
+    /**
+    * Lead surrogate type
+    */
+    private static final int LEAD_SURROGATE_ 
+                                  = UCharacterCategory.CHAR_CATEGORY_COUNT + 1;
+    /**
+    * Trail surrogate type
+    */
+    private static final int TRAIL_SURROGATE_ 
+                                  = UCharacterCategory.CHAR_CATEGORY_COUNT + 2;
+    /**
+    * Extended category count
+    */
+    static final int EXTENDED_CATEGORY_
+                                  = UCharacterCategory.CHAR_CATEGORY_COUNT + 3;
+   
+    // private constructor ------------------------------------------------
+    
+    /**
+    * <p>Protected constructor for use in UCharacter.</p>
+    * @exception IOException thrown when data reading fails
+    */
+    private UCharacterName() throws IOException
+    {
+        InputStream i = getClass().getResourceAsStream(NAME_FILE_NAME_);
+        BufferedInputStream b = new BufferedInputStream(i, 
+                                                        NAME_BUFFER_SIZE_);
+        UCharacterNameReader reader = new UCharacterNameReader(b);
+        reader.read(this);
+        i.close();
+    }
+    
+    // private methods ---------------------------------------------------
+      
+    /**
+    * Gets the algorithmic name for the argument character
+    * @param ch character to determine name for
+    * @param choice name choice
+    * @return the algorithmic name or null if not found
+    */
+    private String getAlgName(int ch, int choice) 
+    {
+    	// Do not write algorithmic Unicode 1.0 names because Unihan names are 
+        // the same as the modern ones, extension A was only introduced with 
+        // Unicode 3.0, and the Hangul syllable block was moved and changed 
+        // around Unicode 1.1.5.
+        if (choice != UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
+       	 	// index in terms integer index
+            synchronized (m_utilStringBuffer_) {
+        	    m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
+        
+            	for (int index = m_algorithm_.length - 1; index >= 0; index --) 
+                {
+             	   if (m_algorithm_[index].contains(ch)) {
+              	      m_algorithm_[index].appendName(ch, m_utilStringBuffer_);
+                	  return m_utilStringBuffer_.toString();
+             	   }
+                }
+            }
+        }
+        return null;
+    }
+      
+    /**
+    * Getting the character with the tokenized argument name
+    * @param name of the character
+    * @return character with the tokenized argument name or -1 if character
+    *         is not found
+    */
+    private synchronized int getGroupChar(String name, int choice) 
+    {
+    	for (int i = 0; i < m_groupcount_; i ++) {
+        	// populating the data set of grouptable
+        	
+        	int startgpstrindex = getGroupLengths(i, m_groupoffsets_, 
+                                                  m_grouplengths_);
+          
+        	// shift out to function
+        	int result = getGroupChar(startgpstrindex, m_grouplengths_, name, 
+        	                          choice);
+        	if (result != -1) {
+            	return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_) 
+            	         | result;
+        	}
+        }
+        return -1;
+    }
+      
+    /**
+    * Compares and retrieve character if name is found within the argument 
+    * group
+    * @param index index where the set of names reside in the group block
+    * @param length list of lengths of the strings
+    * @param name character name to search for
+    * @param choice of either 1.0 or the most current unicode name
+    * @return relative character in the group which matches name, otherwise if   
+    *         not found, -1 will be returned
+    */
+    private int getGroupChar(int index, char length[], String name, 
+                             int choice)
+    { 
+        byte b = 0; 
+        char token;
+        int len;
+        int namelen = name.length();
+        int nindex;
+        int count;
+        
+        for (int result = 0; result <= LINES_PER_GROUP_; result ++) {
+            nindex = 0;
+            len = length[result];
+              
+            if (choice == UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
+                int oldindex = index;
+                index += UCharacterUtility.skipByteSubString(m_groupstring_, 
+                                                     index, len, (byte)';');
+                len -= (index - oldindex);
+            }
+                
+            // number of tokens is > the length of the name
+            // write each letter directly, and write a token word per token
+            for (count = 0; count < len && nindex != -1 && nindex < namelen;
+                ) {
+                b = m_groupstring_[index + count];
+                count ++;
+                   
+                if (b >= m_tokentable_.length) {
+                    if (name.charAt(nindex ++) != (b & 0xFF)) {
+                        nindex = -1;
+                    }
+                }
+                else {
+                    token = m_tokentable_[b & 0xFF];
+                    if (token == 0xFFFE) {
+                        // this is a lead byte for a double-byte token
+                        token = m_tokentable_[b << 8 | 
+                                   (m_groupstring_[index + count] & 0x00ff)];
+                        count ++;
+                    }
+                    if (token == 0xFFFF) {
+                        if (name.charAt(nindex ++) != (b & 0xFF)) {
+                            nindex = -1;
+                        }
+                    }
+                    else {
+                        // compare token with name
+                        nindex = UCharacterUtility.compareNullTermByteSubString(
+                                        name, m_tokenstring_, nindex, token);
+                    }
+                }
+            }
+
+            if (namelen == nindex && 
+                (count == len || m_groupstring_[index + count] == ';')) {
+                return result;
+            }
+                
+            index += len;
+        }
+        return -1;
+    }
+       
+    /**
+    * Binary search for the group strings set that contains the argument Unicode 
+    * code point's most significant bits.
+    * The return value is always a valid group string set that contain msb.
+    * If group string set is not found, -1 is returned
+    * @param ch the code point to look for
+    * @return group string set index in datatable otherwise -1 is returned if 
+    *         group string set is not found
+    */
+    private int getGroupStringIndex(int ch)
+    {
+        // gets the msb
+        int msb = ch >> GROUP_SHIFT_,
+            end = m_groupcount_,
+            start,
+            gindex = 0;
+            
+        // binary search for the group of names that contains the one for code
+        for (start = 0; start < end - 1;) {
+            gindex = (start + end) >> 1;
+            if (msb < m_groupinfo_[gindex * m_groupsize_]) {
+                end = gindex;
+            }
+            else {
+                start = gindex;
+            }
+        }
+
+        // return this if it is an exact match
+        if (msb == m_groupinfo_[start * m_groupsize_]) {
+            start = start * m_groupsize_;
+            return UCharacterUtility.toInt(
+                                m_groupinfo_[start + OFFSET_HIGH_OFFSET_], 
+                                m_groupinfo_[start + OFFSET_LOW_OFFSET_]);
+        }
+        return -1;
+    }
+    
+    /**
+    * Gets the group name of the character
+    * @param ch character to get the group name 
+    * @param choice name choice selector to choose a unicode 1.0 or newer name
+    */
+    private String getGroupName(int ch, int choice) 
+    {            
+        // gets the msb
+        int msb   = getCodepointMSB(ch);
+        int group = getGroup(ch);
+
+        // return this if it is an exact match
+        if (msb == m_groupinfo_[group * m_groupsize_]) {
+            int index = getGroupLengths(group, m_groupoffsets_, 
+                                        m_grouplengths_);
+            int offset = ch & GROUP_MASK_;
+            return getGroupName(index + m_groupoffsets_[offset], 
+                                m_grouplengths_[offset], choice);
+        }
+        
+        return null;
+    }
+    
+    /**
+    * Gets the character extended type
+    * @param ch character to be tested
+    * @return extended type it is associated with
+    */
+    private static int getType(int ch)
+    {
+        if (UCharacterUtility.isNonCharacter(ch)) {  
+            // not a character we return a invalid category count
+            return NON_CHARACTER_;    
+        }    
+        int result = UCharacter.getType(ch);
+        if (result == UCharacterCategory.SURROGATE) {            
+            if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
+                result = LEAD_SURROGATE_;
+            }
+            else {
+                result = TRAIL_SURROGATE_;
+            }    
+        }    
+        return result;
+    }
+    
+    /**
+    * Getting the character with extended name of the form <....>.
+    * @param name of the character to be found
+    * @param choice name choice
+    * @return character associated with the name, -1 if such character is not
+    *                   found and -2 if we should continue with the search.
+    */
+    private static int getExtendedChar(String name, int choice)
+    {
+        if (name.charAt(0) == '<') {        
+            if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {            
+                int endIndex = name.length() - 1;
+                if (name.charAt(endIndex) == '>') {
+                    int startIndex = name.lastIndexOf('-');
+                    if (startIndex >= 0) { // We've got a category.     
+                        startIndex ++;
+                        int result = -1;
+                        try {
+                            result = Integer.parseInt(
+                                        name.substring(startIndex, endIndex), 
+                                        16);
+                        }
+                        catch (NumberFormatException e) {
+                            return -1;     
+                        } 
+                        // Now validate the category name. We could use a 
+                        // binary search, or a trie, if we really wanted to. 
+                        String type = name.substring(1, startIndex - 1);
+                        int length = TYPE_NAMES_.length;
+                        for (int i = 0; i < length; ++ i) {             
+                            if (type.compareTo(TYPE_NAMES_[i]) == 0) { 
+                                if (getType(result) == i) { 
+                                    return result;     
+                                }  
+                                break;          
+                            } 
+                        }
+                    }
+                }
+            }            
+            return -1; 
+        }    
+        return -2;
+    }
+    
+    // sets of name characters, maximum name lengths -----------------------
+
+    /**
+     * Adds a codepoint into a set of ints.
+     * Equivalent to SET_ADD.
+     * @param set set to add to
+     * @param ch 16 bit char to add
+     */
+    private static void add(int set[], char ch) 
+    {
+        set[ch >>> 5] |= 1 << (ch & 0x1f);
+    }
+
+    /**
+     * Checks if a codepoint is a part of a set of ints.
+     * Equivalent to SET_CONTAINS.
+     * @param set set to check in
+     * @param ch 16 bit char to check
+     * @return true if codepoint is part of the set, false otherwise
+     */
+    private static boolean contains(int set[], char ch) 
+    {
+        return (set[ch >>> 5] & (1 << (ch & 0x1f))) != 0; 
+    }
+
+    /**
+     * Adds all characters of the argument str and gets the length 
+     * Equivalent to calcStringSetLength.
+     * @param set set to add all chars of str to
+     * @param str string to add
+     */
+    private static int add(int set[], String str) 
+    {
+        int result = str.length();
+        
+        for (int i = result - 1; i >= 0; i --) {
+            add(set, str.charAt(i));
+        }
+        return result;
+    }
+    
+    /**
+     * Adds all characters of the argument str and gets the length 
+     * Equivalent to calcStringSetLength.
+     * @param set set to add all chars of str to
+     * @param str string to add
+     */
+    private static int add(int set[], StringBuffer str) 
+    {
+        int result = str.length();
+        
+        for (int i = result - 1; i >= 0; i --) {
+            add(set, str.charAt(i));
+        }
+        return result;
+    }
+    
+    /**
+     * Adds all algorithmic names into the name set.
+     * Equivalent to part of calcAlgNameSetsLengths.
+     * @param maxlength length to compare to 
+     * @return the maximum length of any possible algorithmic name if it is > 
+     *         maxlength, otherwise maxlength is returned.
+     */
+    private int addAlgorithmName(int maxlength)
+    {
+        int result = 0;
+        for (int i = m_algorithm_.length - 1; i >= 0; i --) {
+            result = m_algorithm_[i].add(m_nameSet_, maxlength);
+            if (result > maxlength) {
+                maxlength = result;
+            }
+        }
+        return maxlength;
+    }
+    
+    /**
+     * Adds all extended names into the name set.
+     * Equivalent to part of calcExtNameSetsLengths.
+     * @param maxlength length to compare to 
+     * @return the maxlength of any possible extended name.
+     */
+    private int addExtendedName(int maxlength) 
+    {
+        for (int i = TYPE_NAMES_.length - 1; i >= 0; i --) {
+            // for each category, count the length of the category name
+            // plus 9 =
+            // 2 for <>
+            // 1 for -
+            // 6 for most hex digits per code point
+            int length = 9 + add(m_nameSet_, TYPE_NAMES_[i]);
+            if (length > maxlength) {
+                maxlength = length;
+            }
+        }
+        return maxlength;
+    }
+    
+    /**
+     * Adds names of a group to the argument set.
+     * Equivalent to calcNameSetLength.
+     * @param offset of the group name string in byte count
+     * @param length of the group name string
+     * @param tokenlength array to store the length of each token
+     * @param set to add to
+     * @return the length of the name string and the length of the group 
+     *         string parsed
+     */
+    private int[] addGroupName(int offset, int length, byte tokenlength[], 
+                               int set[]) 
+    {
+        int resultnlength = 0;
+        int resultplength = 0;
+        while (resultplength < length) {
+            char b = (char)(m_groupstring_[offset + resultplength] & 0xff);
+            resultplength ++;
+            if (b == ';') {
+                break;
+            }
+              
+            if (b >= m_tokentable_.length) {
+                add(set, b); // implicit letter
+                resultnlength ++;
+            }
+            else {
+                char token = m_tokentable_[b & 0x00ff];
+                if (token == 0xFFFE) {
+                    // this is a lead byte for a double-byte token
+                    b = (char)(b << 8 | (m_groupstring_[offset + resultplength] 
+                                         & 0x00ff));
+                    token = m_tokentable_[b];
+                    resultplength ++;
+                }
+                if (token == 0xFFFF) {
+                    add(set, b);
+                    resultnlength ++;
+                }
+                else { 
+                    // count token word
+                    // use cached token length
+                    byte tlength = tokenlength[b];
+                    if (tlength == 0) {
+                        synchronized (m_utilStringBuffer_) {
+                            m_utilStringBuffer_.delete(0, 
+                                                 m_utilStringBuffer_.length());
+                            UCharacterUtility.getNullTermByteSubString(
+                                           m_utilStringBuffer_, m_tokenstring_,
+                                           token);
+                            tlength = (byte)add(set, m_utilStringBuffer_);
+                        }
+                        tokenlength[b] = tlength;
+                    }
+                    resultnlength += tlength;
+                }
+            }
+        }
+        m_utilIntBuffer_[0] = resultnlength;
+        m_utilIntBuffer_[1] = resultplength;
+        return m_utilIntBuffer_;
+    }
+    
+    /**
+     * Adds names of all group to the argument set.
+     * Sets the data member m_max*Length_.
+     * Method called only once.
+     * Equivalent to calcGroupNameSetsLength.
+     * @param maxlength length to compare to 
+     */
+    private void addGroupName(int maxlength) 
+    {
+        int maxisolength = 0;
+        char offsets[] = new char[LINES_PER_GROUP_ + 2];
+        char lengths[] = new char[LINES_PER_GROUP_ + 2];
+        byte tokenlengths[] = new byte[m_tokentable_.length];
+       
+        // enumerate all groups
+        // for (int i = m_groupcount_ - 1; i >= 0; i --) {
+        for (int i = 0; i < m_groupcount_ ; i ++) {
+            int offset = getGroupLengths(i, offsets, lengths);
+            // enumerate all lines in each group
+            // for (int linenumber = LINES_PER_GROUP_ - 1; linenumber >= 0;
+            //    linenumber --) {
+            for (int linenumber = 0; linenumber < LINES_PER_GROUP_;
+                linenumber ++) {
+                int lineoffset = offset + offsets[linenumber];
+                int length = lengths[linenumber];
+                if (length == 0) {
+                    continue;
+                }
+    
+                // read regular name
+                int parsed[] = addGroupName(lineoffset, length, tokenlengths, 
+                                            m_nameSet_);
+                if (parsed[0] > maxlength) {
+                    // 0 for name length
+                    maxlength = parsed[0];
+                }
+                lineoffset += parsed[1];
+                if (parsed[1] >= length) {
+                    // 1 for parsed group string length
+                    continue;
+                }
+                length -= parsed[1];
+                // read Unicode 1.0 name
+                parsed = addGroupName(lineoffset, length, tokenlengths, 
+                                      m_nameSet_);
+                if (parsed[0] > maxlength) {
+                    // 0 for name length
+                    maxlength = parsed[0];
+                }
+                lineoffset += parsed[1];
+                if (parsed[1] >= length) {
+                    // 1 for parsed group string length
+                    continue;
+                }
+                length -= parsed[1];
+                // read ISO comment
+                parsed = addGroupName(lineoffset, length, tokenlengths, 
+                                      m_ISOCommentSet_);
+                if (parsed[1] > maxisolength) {
+                    maxisolength = length;
+                }
+            }
+        }
+    
+        // set gMax... - name length last for threading
+        m_maxISOCommentLength_ = maxisolength;
+        m_maxNameLength_ = maxlength;
+    }
+    
+    /**
+     * Sets up the name sets and the calculation of the maximum lengths.
+     * Equivalent to calcNameSetsLengths.
+     */
+    private boolean initNameSetsLengths() 
+    {
+        if (m_maxNameLength_ > 0) {
+            return true;
+        }
+    
+        String extra = "0123456789ABCDEF<>-";
+        // set hex digits, used in various names, and <>-, used in extended 
+        // names
+        for (int i = extra.length() - 1; i >= 0; i --) {
+            add(m_nameSet_, extra.charAt(i));
+        }
+    
+        // set sets and lengths from algorithmic names
+        m_maxNameLength_ = addAlgorithmName(0);
+        // set sets and lengths from extended names
+        m_maxNameLength_ = addExtendedName(m_maxNameLength_);
+        // set sets and lengths from group names, set global maximum values
+        addGroupName(m_maxNameLength_);
+        return true;
+    }
+    
+    /**
+     * Converts the char set cset into a Unicode set uset.
+     * Equivalent to charSetToUSet.
+     * @param set Set of 256 bit flags corresponding to a set of chars.
+     * @param uset USet to receive characters. Existing contents are deleted.
+     */
+    private void convert(int set[], UnicodeSet uset) 
+    {
+        uset.clear();
+        if (!initNameSetsLengths()) {
+            return;
+        }
+    
+        // build a char string with all chars that are used in character names
+        for (char c = 255; c > 0; c --) {
+            if (contains(set, c)) {
+                uset.add(c);       
+            }
+        }
+    }
+}
diff --git a/icu4j/src/com/ibm/icu/lang/UCharacterNameChoice.java b/icu4j/src/com/ibm/icu/impl/UCharacterNameChoice.java
old mode 100755
new mode 100644
similarity index 73%
rename from icu4j/src/com/ibm/icu/lang/UCharacterNameChoice.java
rename to icu4j/src/com/ibm/icu/impl/UCharacterNameChoice.java
index 537dd831216..5b8200f35dc
--- a/icu4j/src/com/ibm/icu/lang/UCharacterNameChoice.java
+++ b/icu4j/src/com/ibm/icu/impl/UCharacterNameChoice.java
@@ -6,13 +6,13 @@
 *
 * $Source: 
 *     /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterNameChoiceEnum.java $ 
-* $Date: 2002/02/16 03:05:57 $ 
-* $Revision: 1.4 $
+* $Date: 2002/09/19 21:19:04 $ 
+* $Revision: 1.1 $
 *
 *******************************************************************************
 */
 
-package com.ibm.icu.lang;
+package com.ibm.icu.impl;
 
 /**
 * Internal class containing selector constants for the unicode character names.
@@ -24,12 +24,13 @@ package com.ibm.icu.lang;
 * @since oct0600
 */
 
-interface UCharacterNameChoice
+public interface UCharacterNameChoice
 {
   // public variables =============================================
   
-  static final int U_UNICODE_CHAR_NAME = 0;
-  static final int U_UNICODE_10_CHAR_NAME = 1;
-  static final int U_EXTENDED_CHAR_NAME = 2;
-  static final int U_CHAR_NAME_CHOICE_COUNT = 3;
+  static final int UNICODE_CHAR_NAME = 0;
+  static final int UNICODE_10_CHAR_NAME = 1;
+  static final int EXTENDED_CHAR_NAME = 2;
+  static final int CHAR_NAME_CHOICE_COUNT = 3;
+  static final int ISO_COMMENT_ = CHAR_NAME_CHOICE_COUNT;
 }
diff --git a/icu4j/src/com/ibm/icu/lang/UCharacterNameReader.java b/icu4j/src/com/ibm/icu/impl/UCharacterNameReader.java
old mode 100755
new mode 100644
similarity index 97%
rename from icu4j/src/com/ibm/icu/lang/UCharacterNameReader.java
rename to icu4j/src/com/ibm/icu/impl/UCharacterNameReader.java
index 22471ba057e..320a2d20722
--- a/icu4j/src/com/ibm/icu/lang/UCharacterNameReader.java
+++ b/icu4j/src/com/ibm/icu/impl/UCharacterNameReader.java
@@ -4,13 +4,13 @@
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
-* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/Attic/UCharacterNameReader.java,v $ 
-* $Date: 2002/08/01 19:50:26 $ 
-* $Revision: 1.11 $
+* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/UCharacterNameReader.java,v $ 
+* $Date: 2002/09/19 21:19:04 $ 
+* $Revision: 1.1 $
 *
 *******************************************************************************
 */
-package com.ibm.icu.lang;
+package com.ibm.icu.impl;
 
 import java.io.InputStream;
 import java.io.DataInputStream;
diff --git a/icu4j/src/com/ibm/icu/lang/UCharacterUtil.java b/icu4j/src/com/ibm/icu/impl/UCharacterUtility.java
old mode 100755
new mode 100644
similarity index 72%
rename from icu4j/src/com/ibm/icu/lang/UCharacterUtil.java
rename to icu4j/src/com/ibm/icu/impl/UCharacterUtility.java
index 88a42ac5277..c68f4bf70c8
--- a/icu4j/src/com/ibm/icu/lang/UCharacterUtil.java
+++ b/icu4j/src/com/ibm/icu/impl/UCharacterUtility.java
@@ -4,13 +4,13 @@
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
-* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/Attic/UCharacterUtil.java,v $ 
-* $Date: 2002/07/22 23:28:21 $ 
-* $Revision: 1.6 $
+* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/UCharacterUtility.java,v $ 
+* $Date: 2002/09/19 21:19:04 $ 
+* $Revision: 1.1 $
 *
 *******************************************************************************
 */
-package com.ibm.icu.lang;
+package com.ibm.icu.impl;
 
 /**
 * Internal character utility class for simple data type conversion and String 
@@ -19,18 +19,26 @@ package com.ibm.icu.lang;
 * @since sep2900
 */
 
-final class UCharacterUtil
+public final class UCharacterUtility
 {
-    // constructor =====================================================
-      
+    // public methods -----------------------------------------------------
+    
     /**
-    * private constructor to avoid initialisation
+    * Determines if codepoint is a non character
+    * @param ch codepoint
+    * @return true if codepoint is a non character false otherwise
     */
-    private UCharacterUtil()
+    public static boolean isNonCharacter(int ch) 
     {
+        if ((ch & NON_CHARACTER_SUFFIX_MIN_3_0_) == 
+                                            NON_CHARACTER_SUFFIX_MIN_3_0_) {
+            return true;
+        }
+        
+        return ch >= NON_CHARACTER_MIN_3_1_ && ch <=  NON_CHARACTER_MAX_3_1_;
     }
-      
-    // protected methods ===============================================
+    
+    // package private methods ---------------------------------------------
       
     /**
     * joining 2 chars to form an int
@@ -38,7 +46,7 @@ final class UCharacterUtil
     * @param lsc least significant char
     * @return int form
     */
-    protected static int toInt(char msc, char lsc)
+    static int toInt(char msc, char lsc)
     {
         return ((msc << 16) | lsc);
     }
@@ -49,7 +57,7 @@ final class UCharacterUtil
     * @param lsb the least significant byte
     * @return char form
     */
-    protected static char toChar(byte msb, byte lsb)
+    static char toChar(byte msb, byte lsb)
     {
         return (char)((msb << 8) | (lsb & 0xFF));
     }
@@ -65,7 +73,7 @@ final class UCharacterUtil
     * @param index to start substring in byte count
     * @return the end position of the substring within the character array
     */
-    protected static int getNullTermByteSubString(StringBuffer str, byte[] array, 
+    static int getNullTermByteSubString(StringBuffer str, byte[] array, 
                                                   int index)
     {
         byte b = 1;
@@ -93,7 +101,7 @@ final class UCharacterUtil
     * @return the end position of the substring within str if matches otherwise 
     *         a -1
     */
-    protected static int compareNullTermByteSubString(String str, byte[] array, 
+    static int compareNullTermByteSubString(String str, byte[] array, 
                                                       int strindex, int aindex)
     {
         byte b = 1;
@@ -127,7 +135,7 @@ final class UCharacterUtil
     * @param skipcount number of null terminated substrings to skip
     * @return the end position of the substrings within the character array
     */
-    protected static int skipNullTermByteSubString(byte[] array, int index, 
+    static int skipNullTermByteSubString(byte[] array, int index, 
                                                    int skipcount)
     {
         byte b;
@@ -154,7 +162,7 @@ final class UCharacterUtil
      * @param skipend value of byte to skip to
      * @return the number of bytes skipped
      */
-    protected static int skipByteSubString(byte[] array, int index, int length, 
+    static int skipByteSubString(byte[] array, int index, int length, 
                                            byte skipend)
     {
         int result;
@@ -172,5 +180,30 @@ final class UCharacterUtil
         
         return result;
     }
+    
+    // private data member --------------------------------------------------
+    
+    /**
+    * Minimum suffix value that indicates if a character is non character.
+    * Unicode 3.0 non characters
+    */
+    private static final int NON_CHARACTER_SUFFIX_MIN_3_0_ = 0xFFFE;
+    /**
+    * New minimum non character in Unicode 3.1
+    */
+    private static final int NON_CHARACTER_MIN_3_1_ = 0xFDD0;
+    /**
+    * New non character range in Unicode 3.1
+    */
+    private static final int NON_CHARACTER_MAX_3_1_ = 0xFDEF;
+    
+    // private constructor --------------------------------------------------
+      
+    /**
+    * private constructor to avoid initialisation
+    */
+    private UCharacterUtility()
+    {
+    }
 }
 
diff --git a/icu4j/src/com/ibm/icu/lang/UCharacter.java b/icu4j/src/com/ibm/icu/lang/UCharacter.java
index 95fe2504a6c..11019ca79a1 100755
--- a/icu4j/src/com/ibm/icu/lang/UCharacter.java
+++ b/icu4j/src/com/ibm/icu/lang/UCharacter.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacter.java,v $ 
-* $Date: 2002/09/11 00:12:39 $ 
-* $Revision: 1.46 $
+* $Date: 2002/09/19 21:18:14 $ 
+* $Revision: 1.47 $
 *
 *******************************************************************************
 */
@@ -21,6 +21,9 @@ import com.ibm.icu.util.VersionInfo;
 import com.ibm.icu.text.BreakIterator;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.impl.NormalizerImpl;
+import com.ibm.icu.impl.UCharacterUtility;
+import com.ibm.icu.impl.UCharacterName;
+import com.ibm.icu.impl.UCharacterNameChoice;
 
 /**
 * <p>
@@ -842,7 +845,7 @@ public final class UCharacter
         if (ch <= UTF16.SURROGATE_MAX_VALUE) {
             return false;
         }
-        if (isNonCharacter(ch)) {
+        if (UCharacterUtility.isNonCharacter(ch)) {
             return false;
         }
         return (ch <= MAX_VALUE);
@@ -898,7 +901,7 @@ public final class UCharacter
     */
     public static String getName(int ch)
     {
-        return NAME_.getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);
+        return NAME_.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
     }
       
     /**
@@ -914,7 +917,7 @@ public final class UCharacter
     public static String getName1_0(int ch)
     {
         return NAME_.getName(ch, 
-                             UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
+                             UCharacterNameChoice.UNICODE_10_CHAR_NAME);
     }
     
     /**
@@ -937,7 +940,22 @@ public final class UCharacter
     */
     public static String getExtendedName(int ch) 
     {
-        return NAME_.getName(ch, UCharacterNameChoice.U_EXTENDED_CHAR_NAME);
+        return NAME_.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
+    }
+    
+    /**
+     * Get the ISO 10646 comment for a character.
+     * The ISO 10646 comment is an informative field in the Unicode Character
+     * Database (UnicodeData.txt field 11) and is from the ISO 10646 names list.
+     * @param ch The code point for which to get the ISO comment.
+     *           It must be <code>0<=c<=0x10ffff</code>.
+     * @return The ISO comment, or null if there is no comment for this 
+     *         character.
+     * @draft ICU 2.4
+     */
+    public static String getISOComment(int ch)
+    {
+        return NAME_.getName(ch, UCharacterNameChoice.ISO_COMMENT_);
     }
       
     /**
@@ -952,7 +970,7 @@ public final class UCharacter
     public static int getCharFromName(String name)
     {
         return NAME_.getCharFromName(
-                            UCharacterNameChoice.U_UNICODE_CHAR_NAME, name);
+                            UCharacterNameChoice.UNICODE_CHAR_NAME, name);
     }
       
     /**
@@ -967,7 +985,7 @@ public final class UCharacter
     public static int getCharFromName1_0(String name)
     {
         return NAME_.getCharFromName(
-                         UCharacterNameChoice.U_UNICODE_10_CHAR_NAME, name);
+                         UCharacterNameChoice.UNICODE_10_CHAR_NAME, name);
     }
     
     /**
@@ -992,7 +1010,7 @@ public final class UCharacter
     public static int getCharFromExtendedName(String name)
     {
         return NAME_.getCharFromName(
-                            UCharacterNameChoice.U_EXTENDED_CHAR_NAME, name);
+                            UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
     }
       
     /**
@@ -1462,7 +1480,7 @@ public final class UCharacter
     public static ValueIterator getNameIterator()
     {
         return new UCharacterNameIterator(NAME_,
-                                   UCharacterNameChoice.U_UNICODE_CHAR_NAME);
+                                   UCharacterNameChoice.UNICODE_CHAR_NAME);
     }
     
     /**
@@ -1487,7 +1505,7 @@ public final class UCharacter
     public static ValueIterator getName1_0Iterator()
     {
         return new UCharacterNameIterator(NAME_,
-                                 UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
+                                 UCharacterNameChoice.UNICODE_10_CHAR_NAME);
     }
     
     /**
@@ -1512,7 +1530,7 @@ public final class UCharacter
     public static ValueIterator getExtendedNameIterator()
     {
         return new UCharacterNameIterator(NAME_,
-                                 UCharacterNameChoice.U_EXTENDED_CHAR_NAME);
+                                 UCharacterNameChoice.EXTENDED_CHAR_NAME);
     }
     
     /**
@@ -1616,7 +1634,7 @@ public final class UCharacter
 	{
 		return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
 	}
-
+    
     // protected data members --------------------------------------------
     
     /**
@@ -1629,30 +1647,13 @@ public final class UCharacter
     {
         try
         {
-            NAME_ = new UCharacterName();
+            NAME_ = UCharacterName.getInstance();
         }
         catch (Exception e)
         {
             throw new RuntimeException(e.getMessage());
         }
     }
-    
-    // protected methods -------------------------------------------------
-      
-    /**
-    * Determines if codepoint is a non character
-    * @param ch codepoint
-    * @return true if codepoint is a non character false otherwise
-    */
-    static boolean isNonCharacter(int ch) 
-    {
-        if ((ch & NON_CHARACTER_SUFFIX_MIN_3_0_) == 
-                                            NON_CHARACTER_SUFFIX_MIN_3_0_) {
-            return true;
-        }
-        
-        return ch >= NON_CHARACTER_MIN_3_1_ && ch <=  NON_CHARACTER_MAX_3_1_;
-    }
         
     // private variables -------------------------------------------------
     
@@ -1692,24 +1693,8 @@ public final class UCharacter
     /**
     * Shift 24 bits
     */
-    private static final int SHIFT_24_ = 24;
-      
-    /**
-    * Minimum suffix value that indicates if a character is non character.
-    * Unicode 3.0 non characters
-    */
-    private static final int NON_CHARACTER_SUFFIX_MIN_3_0_ = 0xFFFE;
+    private static final int SHIFT_24_ = 24;  
     
-    /**
-    * New minimum non character in Unicode 3.1
-    */
-    private static final int NON_CHARACTER_MIN_3_1_ = 0xFDD0;
-    
-    /**
-    * New non character range in Unicode 3.1
-    */
-    private static final int NON_CHARACTER_MAX_3_1_ = 0xFDEF;
-      
     /**
     * Decimal radix
     */
diff --git a/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java b/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java
index 9c5ff60bad7..4d86d4e232d 100755
--- a/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java
+++ b/icu4j/src/com/ibm/icu/lang/UCharacterCategory.java
@@ -6,8 +6,8 @@
 *
 * $Source: 
 *      /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategory.java $ 
-* $Date: 2002/09/11 00:12:39 $ 
-* $Revision: 1.8 $
+* $Date: 2002/09/19 21:18:14 $ 
+* $Revision: 1.9 $
 *
 *******************************************************************************
 */
@@ -250,63 +250,4 @@ public final class UCharacterCategory
     private UCharacterCategory()
     {
     }
-    
-	// package private data members --------------------------------------
-	
-	/**
-	* Not a character type
-	*/
-	static final int NON_CHARACTER_ = CHAR_CATEGORY_COUNT;
-	/**
-	* Lead surrogate type
-	*/
-	static final int LEAD_SURROGATE_ = CHAR_CATEGORY_COUNT + 1;
-    /**
-	* Trail surrogate type
-	*/
-	static final int TRAIL_SURROGATE_ = CHAR_CATEGORY_COUNT + 2;
-	/**
-	* Extended category count
-	*/
-	static final int EXTENDED_CATEGORY_ = CHAR_CATEGORY_COUNT + 3;
-	/**
-    * Type names used for extended names
-    */
-    static final String TYPE_NAMES_[] = {"unassigned",
-                                                 "uppercase letter",
-                                                 "lowercase letter",
-                                                 "titlecase letter",
-                                                 "modifier letter",
-                                                 "other letter",
-                                                 "non spacing mark",
-                                                 "enclosing mark",
-                                                 "combining spacing mark",
-                                                 "decimal digit number",
-                                                 "letter number",
-                                                 "other number",
-                                                 "space separator",
-                                                 "line separator",
-                                                 "paragraph separator",
-                                                 "control",
-                                                 "format",
-                                                 "private use area",
-                                                 "surrogate",
-                                                 "dash punctuation",   
-                                                 "start punctuation",
-                                                 "end punctuation",
-                                                 "connector punctuation",
-                                                 "other punctuation",
-                                                 "math symbol",
-                                                 "currency symbol",
-                                                 "modifier symbol",
-                                                 "other symbol",
-                                                 "initial punctuation",
-                                                 "final punctuation",
-                                                 "noncharacter",
-                                                 "lead surrogate",
-                                                 "trail surrogate"};
-   /**
-   * Unknown type name
-   */
-   static final String UNKNOWN_TYPE_NAME_ = "unknown";
 }
diff --git a/icu4j/src/com/ibm/icu/lang/UCharacterName.java b/icu4j/src/com/ibm/icu/lang/UCharacterName.java
deleted file mode 100755
index 208c624a433..00000000000
--- a/icu4j/src/com/ibm/icu/lang/UCharacterName.java
+++ /dev/null
@@ -1,1181 +0,0 @@
-/**
-*******************************************************************************
-* Copyright (C) 1996-2001, International Business Machines Corporation and    *
-* others. All Rights Reserved.                                                *
-*******************************************************************************
-*
-* $Source: 
-*     /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $ 
-* $Date: 2002/07/30 02:38:11 $ 
-* $Revision: 1.17 $
-*
-*******************************************************************************
-*/
-package com.ibm.icu.lang;
-
-import java.io.InputStream;
-import java.io.BufferedInputStream;
-import java.io.IOException;
-import com.ibm.icu.impl.Utility;
-import com.ibm.icu.text.UTF16;
-
-/**
-* Internal class to manage character names.
-* Since data in <a href=UCharacterNameDB.html>UCharacterNameDB</a> is stored
-* in an array of char, by default indexes used in this class is refering to 
-* a 2 byte count, unless otherwise stated. Cases where the index is refering 
-* to a byte count, the index is halved and depending on whether the index is 
-* even or odd, the MSB or LSB of the result char at the halved index is 
-* returned. For indexes to an array of int, the index is multiplied by 2, 
-* result char at the multiplied index and its following char is returned as an 
-* int.
-* <a href=UCharacter.html>UCharacter</a> acts as a public facade for this class
-* Note : 0 - 0x1F are control characters without names in Unicode 3.0
-* Information on parsing of the binary data is located at
-* <a href=oss.software.ibm.com/icu4j/icu4jhtml/com/ibm/icu/text/readme.html>
-* ReadMe</a>
-* @author Syn Wee Quek
-* @since nov0700
-*/
-
-final class UCharacterName
-{
-    // public methods ----------------------------------------------------
-    
-    /**
-    * toString method for printing
-    */
-    public String toString()
-    {
-        StringBuffer result = new StringBuffer("names content \n");
-        /*result.append(super.toString());
-        result.append('\n');
-        result.append("token string offset ");
-        result.append(m_tokenstringoffset_);
-        result.append("\n");
-        result.append("group offset ");
-        result.append(m_groupsoffset_);
-        result.append("\n");
-        result.append("group string offset ");
-        result.append(m_groupstringoffset_);
-        result.append("\n");
-        result.append("alg names offset ");
-        result.append(m_algnamesoffset_);
-        result.append("\n");
-        */
-        return result.toString();
-    } 
-    
-    // package protected inner class -------------------------------------
-    
-    /**
-    * Algorithmic name class
-    */
-    static final class AlgorithmName
-    {
-        // protected data members ----------------------------------------
-        
-        /**
-        * Constant type value of the different AlgorithmName
-        */
-        protected static final int TYPE_0_ = 0;
-        protected static final int TYPE_1_ = 1;
-        
-        // protected constructors ----------------------------------------
-        
-        /**
-        * Constructor
-        */
-        protected AlgorithmName()
-        {
-        }
-        
-        // protected methods ---------------------------------------------
-        
-        /**
-        * Sets the information for accessing the algorithmic names
-        * @param rangestart starting code point that lies within this name group
-        * @param rangeend end code point that lies within this name group
-        * @param type algorithm type. There's 2 kinds of algorithmic type. First 
-        *        which uses code point as part of its name and the other uses 
-        *        variant postfix strings
-        * @param variant algorithmic variant
-        * @return true if values are valid
-        */ 
-        protected boolean setInfo(int rangestart, int rangeend, byte type,
-                                byte variant)
-        {
-            if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend 
-                && rangeend <= UCharacter.MAX_VALUE && 
-                (type == TYPE_0_ || type == TYPE_1_)) {
-                m_rangestart_ = rangestart;
-                m_rangeend_ = rangeend;
-                m_type_ = type;
-                m_variant_ = variant;
-                return true;
-            }
-            return false;
-        }
-        
-        /**
-        * Sets the factor data
-        * @param array of factor
-        * @return true if factors are valid
-        */
-        protected boolean setFactor(char factor[])
-        {
-            if (factor.length == m_variant_) {
-                m_factor_ = factor;
-                return true;
-            }
-            return false;
-        }
-        
-        /**
-        * Sets the name prefix
-        * @param prefix
-        * @return true if prefix is set
-        */
-        protected boolean setPrefix(String prefix)
-        {
-            if (prefix != null && prefix.length() > 0) {
-                m_prefix_ = prefix;
-                return true;
-            }
-            return false;
-        }
-        
-        /**
-        * Sets the variant factorized name data 
-        * @param string variant factorized name data
-        * @return true if values are set
-        */
-        protected boolean setFactorString(byte string[])
-        {
-            // factor and variant string can be empty for things like 
-            // hanggul code points
-            m_factorstring_ = string;
-            return true;
-        }
-      
-        /**
-        * Checks if code point lies in Algorithm object at index
-        * @param ch code point 
-        */
-        protected boolean contains(int ch)
-        {
-            return m_rangestart_ <= ch && ch <= m_rangeend_;
-        }
-        
-        /**
-        * Appends algorithm name of code point into StringBuffer.
-        * Note this method does not check for validity of code point in Algorithm,
-        * result is undefined if code point does not belong in Algorithm.
-        * @param ch code point
-        * @param str StringBuffer to append to
-        */
-        protected void appendName(int ch, StringBuffer str)
-        {
-            str.append(m_prefix_);
-            switch (m_type_) 
-            {
-                case TYPE_0_: 
-                    // prefix followed by hex digits indicating variants
-                    Utility.hex(ch, m_variant_, str);
-                    break;
-                case TYPE_1_: 
-                    // prefix followed by factorized-elements
-                    int offset = ch - m_rangestart_;
-                    int indexes[] = new int[m_variant_];
-                    int factor;
-                      
-                    // write elements according to the factors
-                    // the factorized elements are determined by modulo 
-                    // arithmetic
-                    for (int i = m_variant_ - 1; i > 0; i --) 
-                    {
-                        factor = m_factor_[i] & 0x00FF;
-                        indexes[i] = offset % factor;
-                        offset /= factor;
-                    }
-                      
-                    // we don't need to calculate the last modulus because 
-                    // start <= code <= end guarantees here that 
-                    // code <= factors[0]
-                    indexes[0] = offset;
-
-                    // joining up the factorized strings 
-                    String s[] = getFactorString(indexes);
-                    if (s != null && s.length > 0)
-                    {
-                        int size = s.length;
-                        for (int i = 0; i < size; i ++)
-                        str.append(s[i]);
-                    }
-                    break;
-            }
-        }
-        
-        /**
-        * Gets the character for the argument algorithmic name
-        * @return the algorithmic char or -1 otherwise.
-        */
-        protected int getAlgorithmChar(String name)
-        {
-            int prefixlen = m_prefix_.length();
-            if (name.length() < prefixlen || 
-                !m_prefix_.equals(name.substring(0, prefixlen))) {
-                return -1;
-            }
-                
-            switch (m_type_) 
-            {
-                case TYPE_0_ : 
-                try
-                {
-                    int result = Integer.parseInt(name.substring(prefixlen), 
-                                                  16);
-                    // does it fit into the range?
-                    if (m_rangestart_ <= result && result <= m_rangeend_) {
-                        return result;
-                    }
-                }
-                catch (NumberFormatException e)
-                {
-                    return -1;
-                }
-                break;
-                case TYPE_1_ : 
-                    // repetitative suffix name comparison done here
-                    // offset is the character code - start
-                    for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++)
-                    {
-                        int offset = ch - m_rangestart_;
-                        int indexes[] = new int[m_variant_];
-                        int factor;
-                  
-                        // write elements according to the factors
-                        // the factorized elements are determined by modulo 
-                        // arithmetic
-                        for (int i = m_variant_ - 1; i > 0; i --) 
-                        {
-                            factor = m_factor_[i] & 0x00FF;
-                            indexes[i] = offset % factor;
-                            offset /= factor;
-                        }
-                        
-                        // we don't need to calculate the last modulus 
-                        // because start <= code <= end guarantees here that 
-                        // code <= factors[0]
-                        indexes[0] = offset;
-
-                        // joining up the factorized strings 
-                        if (compareFactorString(indexes, name, prefixlen)) {
-                            return ch;
-                        }
-                    }
-            }
-
-            return -1;
-        }
-        
-        // private data members ------------------------------------------
-        
-        /**
-        * Algorithmic data information
-        */
-        private int m_rangestart_;
-        private int m_rangeend_;
-        private byte m_type_;
-        private byte m_variant_;
-        private char m_factor_[];
-        private String m_prefix_;
-        private byte m_factorstring_[];
-        
-        // private methods -----------------------------------------------
-                
-        /**
-        * Gets the indexth string in each of the argument factor block
-        * @param index array with each index corresponding to each factor block
-        * @return array of indexth factor string in factor block
-        */
-        private String[] getFactorString(int index[])
-        {
-            int size = m_factor_.length;
-            if (index == null || index.length != size) {
-                return null;
-            }
-                
-            String result[] = new String[size];
-            StringBuffer str = new StringBuffer();
-            int count = 0;
-            int factor;
-            size --;
-            for (int i = 0; i <= size; i ++) {
-                factor = m_factor_[i];
-                count = UCharacterUtil.skipNullTermByteSubString(
-                                          m_factorstring_, count, index[i]);
-                count = UCharacterUtil.getNullTermByteSubString(
-                                          str, m_factorstring_, count);
-                if (i != size) {
-                    count = UCharacterUtil.skipNullTermByteSubString(
-                                                   m_factorstring_, count, 
-                                                   factor - index[i] - 1);
-                }
-                result[i] = str.toString();
-                str.delete(0, str.length());
-            }
-            return result;
-        }
-        
-        /**
-        * Compares the indexth string in each of the argument factor block with
-        * the argument string
-        * @param index array with each index corresponding to each factor block
-        * @param str string to compare with
-        * @param offset of str to start comparison
-        * @return true if string matches
-        */
-        private boolean compareFactorString(int index[], String str, 
-                                            int offset)
-        {
-            int size = m_factor_.length;
-            if (index == null || index.length != size)
-                return false;
-                
-            int count = 0;
-            int strcount = offset;
-            int factor;
-            size --;
-            for (int i = 0; i <= size; i ++)
-            {
-                factor = m_factor_[i];
-                count = UCharacterUtil.skipNullTermByteSubString(
-                                          m_factorstring_, count, index[i]);
-                strcount = UCharacterUtil.compareNullTermByteSubString(str, 
-                                          m_factorstring_, strcount, count);
-                if (strcount < 0) {
-                    return false;
-                }
-                  
-                if (i != size) {
-                    count = UCharacterUtil.skipNullTermByteSubString(
-                                  m_factorstring_, count, factor - index[i]);
-                }
-            }
-            if (strcount != str.length()) {
-                return false;
-            }
-            return true;
-        }
-    }
-    
-    // protected data members --------------------------------------------
-    
-    /**
-     * Maximum number of groups
-     */
-    protected int m_groupcount_ = 0;
-    /**
-     * Size of each groups
-     */
-    protected int m_groupsize_ = 0;
-    /**
-    * Number of lines per group 
-    * 1 << GROUP_SHIFT_
-    */
-    protected static final int LINES_PER_GROUP_ = 1 << 5;
-    
-    // protected constructor ---------------------------------------------
-    
-    /**
-    * <p>Protected constructor for use in UCharacter.</p>
-    * @exception IOException thrown when data reading fails
-    */
-    protected UCharacterName() throws IOException
-    {
-        InputStream i = getClass().getResourceAsStream(NAME_FILE_NAME_);
-        BufferedInputStream b = new BufferedInputStream(i, 
-                                                        NAME_BUFFER_SIZE_);
-        UCharacterNameReader reader = new UCharacterNameReader(b);
-        reader.read(this);
-        i.close();
-    }
-      
-    // protected methods -------------------------------------------------
-     
-    /**
-    * Retrieve the name of a Unicode code point.
-    * Depending on <code>choice</code>, the character name written into the 
-    * buffer is the "modern" name or the name that was defined in Unicode 
-    * version 1.0.
-    * The name contains only "invariant" characters
-    * like A-Z, 0-9, space, and '-'.
-    *
-    * @param ch the code point for which to get the name.
-    * @param choice Selector for which name to get.
-    * @return if code point is above 0x1fff, null is returned
-    */
-    protected String getName(int ch, int choice)
-    {
-        if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE || 
-            choice >= UCharacterNameChoice.U_CHAR_NAME_CHOICE_COUNT) {
-            return null;
-        }
-        
-        String result = null;
-        
-        result = getAlgName(ch, choice);
-          
-        // getting normal character name
-        if (result == null || result.length() == 0) {
-        	if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {	  
-                result = getExtendedName(ch);	
-            } else {
-                result = getGroupName(ch, choice);
-            }
-        }
-          
-        return result;
-    }
-      
-    /**
-    * Find a character by its name and return its code point value
-    * @param character name
-    * @param choice selector to indicate if argument name is a Unicode 1.0 
-    *        or the most current version 
-    * @return code point
-    */
-    protected int getCharFromName(int choice, String name)
-    {
-        // checks for illegal arguments
-        if (choice >= UCharacterNameChoice.U_CHAR_NAME_CHOICE_COUNT || 
-            name == null || name.length() == 0) {
-            return -1;
-        }
-        
-        // try extended names first  
-        int result = getExtendedChar(name.toLowerCase(), choice);
-        if (result >= -1) {
-            return result;
-        }
-        
-        String upperCaseName = name.toUpperCase();
-        // try algorithmic names first, if fails then try group names
-        // int result = getAlgorithmChar(choice, uppercasename);
-        
-        if (choice != UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
-        	int count = 0;
-        	if (m_algorithm_ != null) {
-        	    count = m_algorithm_.length;
-        	}
-        	for (count --; count >= 0; count --) {
-         	    result = m_algorithm_[count].getAlgorithmChar(upperCaseName); 
-          	    if (result >= 0) {
-           	        return result;
-            	}
-        	}
-        }
-            
-        if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
-	        result = getGroupChar(upperCaseName, 
-	                              UCharacterNameChoice.U_UNICODE_CHAR_NAME);
-        	if (result == -1) {
-	            result = getGroupChar(upperCaseName, 
-	                              UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);
-        	}
-        }
-        else {
-        	result = getGroupChar(upperCaseName, choice);
-        }
-    	return result;
-    }
-    
-    /**
-    * Sets the token data
-    * @param token array of tokens
-    * @param tokenstring array of string values of the tokens
-    * @return false if there is a data error
-    */
-    protected boolean setToken(char token[], byte tokenstring[])
-    {
-        if (token != null && tokenstring != null && token.length > 0 &&
-            tokenstring.length > 0) {
-            m_tokentable_ = token;
-            m_tokenstring_ = tokenstring;
-            return true;
-        }
-        return false; 
-    }
-        
-    /**
-    * Set the algorithm name information array
-    * @param algorithm information array
-    * @return true if the group string offset has been set correctly
-    */
-    protected boolean setAlgorithm(AlgorithmName alg[])
-    {
-        if (alg != null && alg.length != 0) {
-            m_algorithm_ = alg;
-            return true;
-        }
-        return false;
-    }
-    
-    /**
-    * Sets the number of group and size of each group in number of char
-    * @param count number of groups
-    * @param size size of group in char
-    * @return true if group size is set correctly
-    */
-    protected boolean setGroupCountSize(int count, int size)
-    {
-        if (count <= 0 || size <= 0) {
-            return false;
-        }
-        m_groupcount_ = count;
-        m_groupsize_ = size;
-        return true;
-    }
-      
-    /**
-    * Sets the group name data
-    * @param group index information array
-    * @param groupstring name information array
-    * @return false if there is a data error
-    */
-    protected boolean setGroup(char group[], byte groupstring[])
-    {
-        if (group != null && groupstring != null && group.length > 0 &&
-            groupstring.length > 0) {
-            m_groupinfo_ = group;
-            m_groupstring_ = groupstring;
-            return true;
-        }
-        return false; 
-    }
-    
-    /**
-    * Reads a block of compressed lengths of 32 strings and expands them into 
-    * offsets and lengths for each string. Lengths are stored with a 
-    * variable-width encoding in consecutive nibbles:
-    * If a nibble<0xc, then it is the length itself (0 = empty string).
-    * If a nibble>=0xc, then it forms a length value with the following 
-    * nibble.
-    * The offsets and lengths arrays must be at least 33 (one more) long 
-    * because there is no check here at the end if the last nibble is still 
-    * used.
-    * @param index of group string object in array
-    * @param offsets array to store the value of the string offsets
-    * @param lengths array to store the value of the string length
-    * @return next index of the data string immediately after the lengths 
-    *         in terms of byte address
-    */
-    protected int getGroupLengths(int index, char offsets[], char lengths[]) 
-    {
-        char length = 0xffff;
-        byte b = 0,
-            n = 0;
-        int shift;
-        index = index * m_groupsize_; // byte count offsets of group strings
-        int stringoffset = UCharacterUtil.toInt(
-                                 m_groupinfo_[index + OFFSET_HIGH_OFFSET_], 
-                                 m_groupinfo_[index + OFFSET_LOW_OFFSET_]);
-            
-        offsets[0] = 0;
-        
-        // all 32 lengths must be read to get the offset of the first group 
-        // string
-        for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) {
-            b = m_groupstring_[stringoffset];
-            shift = 4;
-              
-            while (shift >= 0) {
-                // getting nibble
-                n = (byte)((b >> shift) & 0x0F);   
-                if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) {
-                	length = (char)((n - 12) << 4);
-                }
-                else {
-                	if (length != 0xffff) {
-                 	   lengths[i] = (char)((length | n) + 12);
-                	}
-                	else {
-                 	   lengths[i] = (char)n;
-                	}
-                    
-                	if (i < LINES_PER_GROUP_) {
-                 	   offsets[i + 1] = (char)(offsets[i] + lengths[i]);
-                	}
-                    
-                	length = 0xffff;
-                	i ++;
-                }
-                      
-                shift -= 4;
-            }
-        }
-        return stringoffset;
-    }
-    
-    /**
-    * Gets the name of the argument group index
-    * @param index of the group name string in byte count
-    * @param length of the group name string
-    * @param choice of Unicode 1.0 name or the most current name
-    * @return name of the group 
-    */
-    protected String getGroupName(int index, int length, int choice) 
-    {
-        if (choice == UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
-        	int oldindex = index;
-         	index += UCharacterUtil.skipByteSubString(m_groupstring_, 
-         		                               index, length, (byte)';');   
-         	length -= (index - oldindex);
-        }
-        
-        StringBuffer s = new StringBuffer();
-        byte b;
-        char token;
-        for (int i = 0; i < length;) {
-            b = m_groupstring_[index + i];
-            i ++;
-              
-            if (b >= m_tokentable_.length) {
-                if (b == ';') {
-                	break;
-                }
-                s.append(b); // implicit letter
-            }
-            else {
-                token = m_tokentable_[b & 0x00ff];
-                if (token == 0xFFFE) {
-                    // this is a lead byte for a double-byte token
-                    token = m_tokentable_[b << 8 | 
-                                      (m_groupstring_[index + i] & 0x00ff)];
-                    i ++;
-                }
-                if (token == 0xFFFF) {
-                    if (b == ';') {
-                    	// skip the semicolon if we are seeking extended 
-                    	// names and there was no 2.0 name but there
-                        // is a 1.0 name.
-                    	if (s.length() == 0 && choice == 
-                    	       UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
-                        	continue;
-                    	}
-                        break;
-                    }
-                    s.append((char)(b & 0x00ff)); // explicit letter
-                }
-                else { // write token word
-                    UCharacterUtil.getNullTermByteSubString(s, 
-                                                     m_tokenstring_, token);
-                }
-            }
-        }
-
-        if (s.length() == 0) {
-            return null;
-        }
-        return s.toString();
-    }
-    
-    /**
-    * Retrieves the extended name
-    */
-    protected String getExtendedName(int ch) 
-    {    
-        String result = getName(ch, UCharacterNameChoice.U_UNICODE_CHAR_NAME);    
-        if (result == null) {        
-            if (getType(ch) == UCharacterCategory.CONTROL) {            
-                result = getName(ch, 
-                                 UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);        
-            }        
-            if (result == null) {            
-                result = getExtendedOr10Name(ch);
-            }
-        }    
-        return result;
-    }
-    
-    /**
-     * Gets the group index for the codepoint, or the group before it.
-     * @param codepoint
-     * @return group index containing codepoint or the group before it.
-     */
-    protected int getGroup(int codepoint)
-    {
-    	int endGroup = m_groupcount_;
-    	int msb      = getCodepointMSB(codepoint);
-        int result   = 0;    
-        // binary search for the group of names that contains the one for 
-        // code
-        // find the group that contains codepoint, or the highest before it
-        while (result < endGroup - 1) {
-            int gindex = (result + endGroup) >> 1;
-            if (msb < getGroupMSB(gindex)) {
-               	endGroup = gindex;
-            }
-            else {
-               	result = gindex;
-            }
-        }
-        return result;
-    }
-    
-    /**
-     * Gets the extended and 1.0 name when the most current unicode names
-     * fail
-     * @param ch codepoint
-     * @return name of codepoint extended or 1.0
-     */
-    protected String getExtendedOr10Name(int ch)
-    {
-    	String result = null;
-    	if (getType(ch) == UCharacterCategory.CONTROL) {            
-            result = getName(ch, 
-                             UCharacterNameChoice.U_UNICODE_10_CHAR_NAME);        
-        }        
-        if (result == null) {            
-            int type = getType(ch);    
-            // Return unknown if the table of names above is not up to 
-            // date.
-            if (type >= UCharacterCategory.TYPE_NAMES_.length) {       
-                result = UCharacterCategory.UNKNOWN_TYPE_NAME_;    
-            } 
-            else {        
-                result = UCharacterCategory.TYPE_NAMES_[type];    
-            }
-            StringBuffer tempResult = new StringBuffer(result);
-            tempResult.insert(0, '<');
-            tempResult.append('-');
-            String chStr = Integer.toHexString(ch).toUpperCase();
-            int zeros = 4 - chStr.length();
-            while (zeros > 0) {
-                tempResult.append('0');
-                zeros --;
-            }
-            tempResult.append(chStr);
-            tempResult.append('>');
-            result = tempResult.toString();
-        }
-        return result;
-    }
-    
-    // these are all UCharacterNameIterator use methods -------------------
-    
-    /**
-     * Gets the MSB from the group index
-     * @param gindex group index
-     * @return the MSB of the group if gindex is valid, -1 otherwise
-     */
-    protected int getGroupMSB(int gindex)
-    {
-    	if (gindex >= m_groupcount_) {
-    		return -1;
-    	}
-    	return m_groupinfo_[gindex * m_groupsize_];
-    }
-    
-    /**
-     * Gets the MSB of the codepoint
-     * @param codepoint 
-     * @return the MSB of the codepoint
-     */
-    protected int getCodepointMSB(int codepoint)
-    {
-    	return codepoint >> GROUP_SHIFT_;
-    }
-    
-    /**
-     * Gets the maximum codepoint + 1 of the group
-     * @param msb most significant byte of the group
-     * @return limit codepoint of the group
-     */
-    protected int getGroupLimit(int msb)
-    {
-    	return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_;
-    }
-    
-    /**
-     * Gets the minimum codepoint of the group
-     * @param msb most significant byte of the group
-     * @return minimum codepoint of the group
-     */
-    protected int getGroupMin(int msb)
-    {
-    	return msb << GROUP_SHIFT_;
-    }
-    
-    /**
-     * Gets the offset to a group
-     * @param codepoint 
-     * @return offset to a group
-     */
-    protected int getGroupOffset(int codepoint)
-    {
-    	return codepoint & GROUP_MASK_;
-    }
-
-	/**
-     * Gets the minimum codepoint of a group
-     * @param codepoint
-     * @return minimum codepoint in the group which codepoint belongs to
-     */
-    protected int getGroupMinFromCodepoint(int codepoint)
-    {
-    	return codepoint & ~GROUP_MASK_;
-    }
-    
-    /**
-     * Get the Algorithm range length 
-     * @return Algorithm range length
-     */
-    protected int getAlgorithmLength()
-    {
-    	return m_algorithm_.length;
-    }
-        
-    /**
-     * Gets the start of the range
-     * @param index algorithm index
-     * @return algorithm range start
-     */
-    protected int getAlgorithmStart(int index)
-    {
-      	return m_algorithm_[index].m_rangestart_;
-    }
-        
-    /**
-     * Gets the end of the range
-     * @param index algorithm index
-     * @return algorithm range end
-     */
-    protected int getAlgorithmEnd(int index)
-    {
-      	return m_algorithm_[index].m_rangeend_;
-    }
-    
-    /**
-     * Gets the Algorithmic name of the codepoint
-     * @param index algorithmic range index
-     * @param codepoint 
-     * @return algorithmic name of codepoint
-     */
-    protected String getAlgorithmName(int index, int codepoint) 
-    {
-    	StringBuffer result = new StringBuffer();
-    	m_algorithm_[index].appendName(codepoint, result);
-        return result.toString();
-    }
-    
-        
-    // private data members ----------------------------------------------
-    
-    /**
-    * Data used in unames.icu
-    */
-    private char m_tokentable_[];
-    private byte m_tokenstring_[];
-    private char m_groupinfo_[];
-    private byte m_groupstring_[];
-    private AlgorithmName m_algorithm_[];
-      
-    /**
-    * Group use
-    */
-    private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1];
-    private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1];
-      	 
-    /**
-    * Default name of the name datafile
-    */
-    private static final String NAME_FILE_NAME_ = 
-                                           "/com/ibm/icu/impl/data/unames.icu";
-    /**
-    * Shift count to retrieve group information
-    */
-    private static final int GROUP_SHIFT_ = 5;
-    /**
-    * Mask to retrieve the offset for a particular character within a group
-    */
-    private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1;
-    /**
-    * Default buffer size of datafile
-    */
-    private static final int NAME_BUFFER_SIZE_ = 100000;
-      
-    /**
-    * Position of offsethigh in group information array
-    */
-    private static final int OFFSET_HIGH_OFFSET_ = 1;
-      
-    /**
-    * Position of offsetlow in group information array
-    */
-    private static final int OFFSET_LOW_OFFSET_ = 2;
-    /**
-    * Double nibble indicator, any nibble > this number has to be combined
-    * with its following nibble
-    */
-    private static final int SINGLE_NIBBLE_MAX_ = 11;
-     
-      
-    // private methods ---------------------------------------------------
-      
-    /**
-    * Gets the algorithmic name for the argument character
-    * @param ch character to determine name for
-    * @param choice name choice
-    * @return the algorithmic name or null if not found
-    */
-    private String getAlgName(int ch, int choice) 
-    {
-    	// Do not write algorithmic Unicode 1.0 names because Unihan names are 
-        // the same as the modern ones, extension A was only introduced with 
-        // Unicode 3.0, and the Hangul syllable block was moved and changed 
-        // around Unicode 1.1.5.
-        if (choice != UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
-       	 	// index in terms integer index
-        	StringBuffer s = new StringBuffer();
-        
-        	for (int index = m_algorithm_.length - 1; index >= 0; index --) {
-         	   if (m_algorithm_[index].contains(ch)) {
-          	      m_algorithm_[index].appendName(ch, s);
-            	  return s.toString();
-         	   }
-            }
-        }
-        return null;
-    }
-      
-    /**
-    * Getting the character with the tokenized argument name
-    * @param name of the character
-    * @return character with the tokenized argument name or -1 if character
-    *         is not found
-    */
-    private synchronized int getGroupChar(String name, int choice) 
-    {
-    	for (int i = 0; i < m_groupcount_; i ++) {
-        	// populating the data set of grouptable
-        	
-        	int startgpstrindex = getGroupLengths(i, m_groupoffsets_, 
-                                                  m_grouplengths_);
-          
-        	// shift out to function
-        	int result = getGroupChar(startgpstrindex, m_grouplengths_, name, 
-        	                          choice);
-        	if (result != -1) {
-            	return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_) 
-            	         | result;
-        	}
-        }
-        return -1;
-    }
-      
-    /**
-    * Compares and retrieve character if name is found within the argument 
-    * group
-    * @param index index where the set of names reside in the group block
-    * @param length list of lengths of the strings
-    * @param name character name to search for
-    * @param choice of either 1.0 or the most current unicode name
-    * @return relative character in the group which matches name, otherwise if   
-    *         not found, -1 will be returned
-    */
-    private int getGroupChar(int index, char length[], String name, 
-                             int choice)
-    { 
-        byte b = 0; 
-        char token;
-        int len;
-        int namelen = name.length();
-        int nindex;
-        int count;
-        
-        for (int result = 0; result <= LINES_PER_GROUP_; result ++) {
-            nindex = 0;
-            len = length[result];
-              
-            if (choice == UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
-                int oldindex = index;
-                index += UCharacterUtil.skipByteSubString(m_groupstring_, 
-                                                     index, len, (byte)';');
-                len -= (index - oldindex);
-            }
-                
-            // number of tokens is > the length of the name
-            // write each letter directly, and write a token word per token
-            for (count = 0; count < len && nindex != -1 && nindex < namelen;
-                ) {
-                b = m_groupstring_[index + count];
-                count ++;
-                   
-                if (b >= m_tokentable_.length) {
-                    if (name.charAt(nindex ++) != (b & 0xFF)) {
-                        nindex = -1;
-                    }
-                }
-                else {
-                    token = m_tokentable_[b & 0xFF];
-                    if (token == 0xFFFE) {
-                        // this is a lead byte for a double-byte token
-                        token = m_tokentable_[b << 8 | 
-                                   (m_groupstring_[index + count] & 0x00ff)];
-                        count ++;
-                    }
-                    if (token == 0xFFFF) {
-                        if (name.charAt(nindex ++) != (b & 0xFF)) {
-                            nindex = -1;
-                        }
-                    }
-                    else {
-                        // compare token with name
-                        nindex = UCharacterUtil.compareNullTermByteSubString(
-                                        name, m_tokenstring_, nindex, token);
-                    }
-                }
-            }
-
-            if (namelen == nindex && 
-                (count == len || m_groupstring_[index + count] == ';')) {
-                return result;
-            }
-                
-            index += len;
-        }
-        return -1;
-    }
-       
-    /**
-    * Binary search for the group strings set that contains the argument Unicode 
-    * code point's most significant bits.
-    * The return value is always a valid group string set that contain msb.
-    * If group string set is not found, -1 is returned
-    * @param ch the code point to look for
-    * @return group string set index in datatable otherwise -1 is returned if 
-    *         group string set is not found
-    */
-    private int getGroupStringIndex(int ch)
-    {
-        // gets the msb
-        int msb = ch >> GROUP_SHIFT_,
-            end = m_groupcount_,
-            start,
-            gindex = 0;
-            
-        // binary search for the group of names that contains the one for code
-        for (start = 0; start < end - 1;) {
-            gindex = (start + end) >> 1;
-            if (msb < m_groupinfo_[gindex * m_groupsize_]) {
-                end = gindex;
-            }
-            else {
-                start = gindex;
-            }
-        }
-
-        // return this if it is an exact match
-        if (msb == m_groupinfo_[start * m_groupsize_]) {
-            start = start * m_groupsize_;
-            return UCharacterUtil.toInt(
-                                m_groupinfo_[start + OFFSET_HIGH_OFFSET_], 
-                                m_groupinfo_[start + OFFSET_LOW_OFFSET_]);
-        }
-        return -1;
-    }
-    
-    /**
-    * Gets the group name of the character
-    * @param ch character to get the group name 
-    * @param choice name choice selector to choose a unicode 1.0 or newer name
-    */
-    private synchronized String getGroupName(int ch, int choice) 
-    {            
-        // gets the msb
-        int msb   = getCodepointMSB(ch);
-        int group = getGroup(ch);
-
-        // return this if it is an exact match
-        if (msb == m_groupinfo_[group * m_groupsize_]) {
-            int index = getGroupLengths(group, m_groupoffsets_, 
-                                        m_grouplengths_);
-            int offset = ch & GROUP_MASK_;
-            return getGroupName(index + m_groupoffsets_[offset], 
-                                m_grouplengths_[offset], choice);
-        }
-        
-        return null;
-    }
-    
-    /**
-    * Gets the character extended type
-    * @param ch character to be tested
-    * @return extended type it is associated with
-    */
-    private int getType(int ch)
-    {
-        if (UCharacter.isNonCharacter(ch)) {  
-            // not a character we return a invalid category count
-            return UCharacterCategory.NON_CHARACTER_;    
-        }    
-        int result = UCharacter.getType(ch);
-        if (result == UCharacterCategory.SURROGATE) {            
-            if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
-                result = UCharacterCategory.LEAD_SURROGATE_;
-            }
-            else {
-                result = UCharacterCategory.TRAIL_SURROGATE_;
-            }    
-        }    
-        return result;
-    }
-    
-    /**
-    * Getting the character with extended name of the form <....>.
-    * @param name of the character to be found
-    * @param choice name choice
-    * @return character associated with the name, -1 if such character is not
-    *                   found and -2 if we should continue with the search.
-    */
-    private int getExtendedChar(String name, int choice)
-    {
-        if (name.charAt(0) == '<') {        
-            if (choice == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {            
-                int endIndex = name.length() - 1;
-                if (name.charAt(endIndex) == '>') {
-                    int startIndex = name.lastIndexOf('-');
-                    if (startIndex >= 0) { // We've got a category.     
-                        startIndex ++;
-                        int result = -1;
-                        try {
-                            result = Integer.parseInt(
-                                        name.substring(startIndex, endIndex), 
-                                        16);
-                        }
-                        catch (NumberFormatException e) {
-                            return -1;     
-                        } 
-                        // Now validate the category name. We could use a 
-                        // binary search, or a trie, if we really wanted to. 
-                        String type = name.substring(1, startIndex - 1);
-                        int length = UCharacterCategory.TYPE_NAMES_.length;
-                        for (int i = 0; i < length; ++ i) {             
-                            if (type.compareTo(
-                                   UCharacterCategory.TYPE_NAMES_[i]) == 0) { 
-                                if (getType(result) == i) { 
-                                    return result;     
-                                }  
-                                break;          
-                            } 
-                        }
-                    }
-                }
-            }            
-            return -1; 
-        }    
-        return -2;
-    }
-}
diff --git a/icu4j/src/com/ibm/icu/lang/UCharacterNameIterator.java b/icu4j/src/com/ibm/icu/lang/UCharacterNameIterator.java
index 796c98e9383..9146b279a4e 100644
--- a/icu4j/src/com/ibm/icu/lang/UCharacterNameIterator.java
+++ b/icu4j/src/com/ibm/icu/lang/UCharacterNameIterator.java
@@ -5,8 +5,8 @@
 ******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacterNameIterator.java,v $
-* $Date: 2002/04/05 01:38:15 $
-* $Revision: 1.3 $
+* $Date: 2002/09/19 21:18:14 $
+* $Revision: 1.4 $
 *
 ******************************************************************************
 */
@@ -14,6 +14,8 @@
 package com.ibm.icu.lang;
 
 import com.ibm.icu.util.ValueIterator;
+import com.ibm.icu.impl.UCharacterName;
+import com.ibm.icu.impl.UCharacterNameChoice;
 
 /**
  * <p>Class enabling iteration of the codepoints and their names.</p>
@@ -43,7 +45,7 @@ class UCharacterNameIterator implements ValueIterator
     		return false;
     	}
     	
-    	if (m_choice_ != UCharacterNameChoice.U_UNICODE_10_CHAR_NAME) {
+    	if (m_choice_ != UCharacterNameChoice.UNICODE_10_CHAR_NAME) {
     		int length = m_name_.getAlgorithmLength();
     		if (m_algorithmIndex_ < length) {
     			while (m_algorithmIndex_ < length) {
@@ -97,7 +99,7 @@ class UCharacterNameIterator implements ValueIterator
     		m_current_ ++;
     		return true;
     	}
-    	else if (m_choice_ == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
+    	else if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
     		if (!iterateExtended(element, m_limit_)) {
     			m_current_ ++;
     			return true;
@@ -238,7 +240,7 @@ class UCharacterNameIterator implements ValueIterator
         		                          index + GROUP_OFFSETS_[offset], 
         	 	                          GROUP_LENGTHS_[offset], m_choice_);
         		if ((name == null || name.length() == 0) && 
-          	 		m_choice_ == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
+          	 		m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
            			name = m_name_.getExtendedName(m_current_);
         		}
         		if (name != null && name.length() > 0) {
@@ -297,7 +299,7 @@ class UCharacterNameIterator implements ValueIterator
        			if (gMIN > limit) {
                		gMIN = limit;
             	}
-       			if (m_choice_ == UCharacterNameChoice.U_EXTENDED_CHAR_NAME) {
+       			if (m_choice_ == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
 					if (!iterateExtended(result, gMIN)) {
 		 				return false;
 					}