ICU-5407 The UTF-32 converter is now supported by the ICU4J charset code.

X-SVN-Rev: 20485
2025-04-08 06:53:45 +00:00 · 2006-10-04 21:46:41 +00:00 · 2006-10-04 21:46:41 +00:00 · 820cc4cc73
commit 820cc4cc73
parent ef584abc21
3 changed files with 28 additions and 315 deletions
--- a/icu4j/src/com/ibm/icu/dev/demo/charsetdet/DetectingViewer.java
+++ b/icu4j/src/com/ibm/icu/dev/demo/charsetdet/DetectingViewer.java
@ -1,7 +1,7 @@
 /*
 **************************************************************************
- * Copyright (C) 2005, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                           *
+ * Copyright (C) 2005-2006, International Business Machines Corporation   *
+ * and others. All Rights Reserved.                                       *
 **************************************************************************
 *
 */
@ -15,7 +15,6 @@ import java.net.URL;

 import javax.swing.*;

-import com.ibm.icu.impl.UTF32;
 import com.ibm.icu.text.CharsetDetector;
 import com.ibm.icu.text.CharsetMatch;

@ -96,12 +95,6 @@ public class DetectingViewer extends JFrame implements ActionListener
        return new BufferedInputStream(fileStream);
    }
    
-    private void openFile(String directory, String filename)
-    {
-        openFile(new File(directory, filename));
-    }
-    
-    
    private BufferedInputStream openURL(String url)
    {
        InputStream s = null;
@ -260,34 +253,14 @@ public class DetectingViewer extends JFrame implements ActionListener
            
            inputStream.reset();
            
-            if (encoding.startsWith("UTF-32")) {
-                byte[] bytes = new byte[1024];
-                int offset = 0;
-                int chBytes = 0;
-                UTF32 utf32 = UTF32.getInstance(encoding);
-                
-                while ((bytesRead = inputStream.read(bytes, offset, 1024)) >= 0) {
-                    offset  = bytesRead % 4;
-                    chBytes = bytesRead - offset;
-                    
-                    sb.append(utf32.fromBytes(bytes, 0, chBytes));
-                    
-                    if (offset != 0) {
-                        for (int i = 0; i < offset; i += 1) {
-                            bytes[i] = bytes[chBytes + i];
-                        }
-                    }
-                }
-            } else {
-                isr = new InputStreamReader(inputStream, encoding);
-                
-                while ((bytesRead = isr.read(buffer, 0, 1024)) >= 0) {
-                    sb.append(buffer, 0, bytesRead);
-                }
-                
-                isr.close();
+            isr = new InputStreamReader(inputStream, encoding);
+            
+            while ((bytesRead = isr.read(buffer, 0, 1024)) >= 0) {
+                sb.append(buffer, 0, bytesRead);
            }
            
+            isr.close();
+            
            this.setTitle(title + " - " + encodingName(matches[0]));
            
            setMatchMenu(matches);
--- a/icu4j/src/com/ibm/icu/dev/test/charsetdet/TestCharsetDetector.java
+++ b/icu4j/src/com/ibm/icu/dev/test/charsetdet/TestCharsetDetector.java
@ -13,7 +13,6 @@ import java.io.Reader;
 import java.io.UnsupportedEncodingException;

 import com.ibm.icu.dev.test.TestFmwk;
-import com.ibm.icu.impl.UTF32;
 import com.ibm.icu.text.CharsetDetector;
 import com.ibm.icu.text.CharsetMatch;

@ -141,36 +140,30 @@ public class TestCharsetDetector extends TestFmwk
            CharsetDetector det = new CharsetDetector();
            byte[] bytes;
            
-            if (enc.startsWith("UTF-32")) {
-                UTF32 utf32 = UTF32.getInstance(enc);
-                
-                bytes = utf32.toBytes(testString);
-            } else {
-                String from = enc;
+            String from = enc;

-                while (true) {
-                    try {
-                        bytes = testString.getBytes(from);
-                    } catch (UnsupportedOperationException uoe) {
-                         // In some runtimes, the ISO-2022-CN converter
-                         // only converts *to* Unicode - we have to use
-                         // x-ISO-2022-CN-GB to convert *from* Unicode.
-                        if (from.equals("ISO-2022-CN")) {
-                            from = "x-ISO-2022-CN-GB";
-                            continue;
-                        }
-                        
-                        // Ignore any other converters that can't
-                        // convert from Unicode.
-                        return;
-                    } catch (UnsupportedEncodingException uee) {
-                        // Ignore any encodings that this runtime
-                        // doesn't support.
-                        return;
+            while (true) {
+                try {
+                    bytes = testString.getBytes(from);
+                } catch (UnsupportedOperationException uoe) {
+                     // In some runtimes, the ISO-2022-CN converter
+                     // only converts *to* Unicode - we have to use
+                     // x-ISO-2022-CN-GB to convert *from* Unicode.
+                    if (from.equals("ISO-2022-CN")) {
+                        from = "x-ISO-2022-CN-GB";
+                        continue;
                    }
                    
-                    break;
+                    // Ignore any other converters that can't
+                    // convert from Unicode.
+                    return;
+                } catch (UnsupportedEncodingException uee) {
+                    // Ignore any encodings that this runtime
+                    // doesn't support.
+                    return;
                }
+                
+                break;
            }
        
            det.setText(bytes);
--- a/icu4j/src/com/ibm/icu/impl/UTF32.java
+++ b/icu4j/src/com/ibm/icu/impl/UTF32.java
@ -1,253 +0,0 @@
-/*
- *******************************************************************************
- * Copyright (C) 2005, International Business Machines Corporation and         *
- * others. All Rights Reserved.                                                *
- *******************************************************************************
- *
- */
-
-package com.ibm.icu.impl;
-
-import com.ibm.icu.text.UTF16;
-
-/**
- * This class converts between an array of bytes in UTF-32 encoding (BE or LE) and
- * Java Strings.
- * 
- * @internal
- */
-public abstract class UTF32
-{
-    /**
-     * This method packs a 32-bit Unicode code point into the byte array. It is
-     * implemented by subclasses that implement the BE and LE encodings.
-     * 
-     * @param bytes the destination byte array
-     * @param codePoint the 32-bit Unicode code point
-     * @param out the destination index in <code>bytes</code>.
-     * 
-     * @internal
-     */
-    abstract protected void pack(byte[] bytes, int codePoint, int out);
-    
-    /**
-     * This method unpacks bytes from the encoded byte array into a 32-bit
-     * Unicode code point. It is implmeented by subclasses that implmeent the BE and LE encodings.
-     * 
-     * @param bytes the source byte array.
-     * @param index the index of the first source byte.
-     * @return the 32-bit Unicode code point.
-     * 
-     * @internal
-     */
-    abstract protected int unpack(byte[] bytes, int index);
-    
-    
-    /**
-     * Convert a Java String into an array of UTF-32 encoded bytes. Calls
-     * the <code>pack</code> method to do the encoding.
-     * 
-     * @param utf16 the source Java String.
-     * @return an array of UTF-32 encoded bytes.
-     * 
-     * @internal
-     */
-    public byte[] toBytes(String utf16)
-    {
-        int codePoints = UTF16.countCodePoint(utf16);
-        byte[] bytes = new byte[codePoints * 4];
-        int out = 0;
-
-        for (int cp = 0; cp < codePoints; out += 4) {
-            int codePoint = UTF16.charAt(utf16, cp);
-            
-            pack(bytes, codePoint, out);
-            cp += UTF16.getCharCount(codePoint);
-        }
-        
-        return bytes;
-    }
-    
-    /**
-     * This method converts a sequence of UTF-32 encoded bytes into
-     * a Java String. It calls the <code>unpack</code> method to implement
-     * the encoding.
-     * 
-     * @param bytes the source byte array.
-     * @param offset the starting offset in the byte array.
-     * @param count the number of bytes to process.
-     * @return the Java String.
-     * 
-     * @internal
-     */
-    public String fromBytes(byte[] bytes, int offset, int count)
-    {
-        StringBuffer buffer = new StringBuffer();
-        int limit = offset + count;
-        
-        for (int cp = offset; cp < limit; cp += 4) {
-            int codePoint = unpack(bytes, cp);
-            
-            UTF16.append(buffer, codePoint);
-        }
-        
-        return buffer.toString();
-    }
-    
-    /**
-     * A convenience method that converts an entire byte array
-     * into a Java String.
-     * 
-     * @param bytes the source byte array.
-     * @return the Java String.
-     * 
-     * @internal
-     */
-    public String fromBytes(byte[] bytes)
-    {
-        return fromBytes(bytes, 0, bytes.length);
-    }
-    
-    /**
-     * Get an instance that implements UTF-32BE encoding.
-     * 
-     * @return the instance.
-     * 
-     * @internal
-     */
-    static public UTF32 getBEInstance()
-    {
-        if (beInstance == null) {
-            beInstance = new BE();
-        }
-        
-        return beInstance;
-    }
-    
-    /**
-     * Get an instance that implemnts the UTF-32LE encoding.
-     * 
-     * @return the instance.
-     * 
-     * @internal
-     */
-    static public UTF32 getLEInstance()
-    {
-        if (leInstance == null) {
-            leInstance = new LE();
-        }
-        
-        return leInstance;
-    }
-    
-    /**
-     * Get an instance that implements either UTF-32BE or UTF32-LE,
-     * depending on the encoding name suppled.
-     * 
-     * @param encoding the encoding name - must be <code>"UTF-32BE"</code> or <code>"UTF-32LE"</code>.
-     * @return the instance.
-     * 
-     * @internal
-     */
-    static public UTF32 getInstance(String encoding)
-    {
-        if (encoding.equals("UTF-32BE")) {
-            return getBEInstance();
-        }
-        
-        if (encoding.equals("UTF-32LE")) {
-            return getLEInstance();
-        }
-        
-        return null;
-    }
-    
-    /**
-     * This sublcass implements the UTF-32BE encoding via the
-     * <code>pack</code> and <code>unpack</code> methods.
-     * 
-     * @internal
-     */
-    static class BE extends UTF32
-    {
-        /**
-         * This method packs a 32-bit Unicode code point into the byte array using
-         * the UTF-32BE encoding.
-         * 
-         * @param bytes the destination byte array
-         * @param codePoint the 32-bit Unicode code point
-         * @param out the destination index in <code>bytes</code>.
-         * 
-         * @internal
-         */
-        public void pack(byte[] bytes, int codePoint, int out)
-        {
-            bytes[out + 0] = (byte) ((codePoint >> 24) & 0xFF);
-            bytes[out + 1] = (byte) ((codePoint >> 16) & 0xFF);
-            bytes[out + 2] = (byte) ((codePoint >>  8) & 0xFF);
-            bytes[out + 3] = (byte) ((codePoint >>  0) & 0xFF);
-        }
-        
-        /**
-         * This method unpacks bytes from the UTF-32BE encoded byte array into a 32-bit
-         * Unicode code point.
-         * 
-         * @param bytes the source byte array.
-         * @param index the index of the first source byte.
-         * @return the 32-bit Unicode code point.
-         * 
-         * @internal
-         */
-        public int unpack(byte[] bytes, int index)
-        {
-            return (bytes[index + 0] & 0xFF) << 24 | (bytes[index + 1] & 0xFF) << 16 |
-                   (bytes[index + 2] & 0xFF) <<  8 | (bytes[index + 3] & 0xFF);
-        }
-    }
-    
-    /**
-     * This sublcass implements the UTF-32LE encoding via the
-     * <code>pack</code> and <code>unpack</code> methods.
-     * 
-     * @internal
-     */
-    static class LE extends UTF32
-    {
-        /**
-         * This method packs a 32-bit Unicode code point into the byte array using
-         * the UTF-32LE encoding.
-         * 
-         * @param bytes the destination byte array
-         * @param codePoint the 32-bit Unicode code point
-         * @param out the destination index in <code>bytes</code>.
-         * 
-         * @internal
-         */
-        public void pack(byte[] bytes, int codePoint, int out)
-        {
-            bytes[out + 3] = (byte) ((codePoint >> 24) & 0xFF);
-            bytes[out + 2] = (byte) ((codePoint >> 16) & 0xFF);
-            bytes[out + 1] = (byte) ((codePoint >>  8) & 0xFF);
-            bytes[out + 0] = (byte) ((codePoint >>  0) & 0xFF);
-        }
-        
-        /**
-         * This method unpacks bytes from the UTF-32LE encoded byte array into a 32-bit
-         * Unicode code point.
-         * 
-         * @param bytes the source byte array.
-         * @param index the index of the first source byte.
-         * @return the 32-bit Unicode code point.
-         * 
-         * @internal
-         */
-        public int unpack(byte[] bytes, int index)
-        {
-            return (bytes[index + 3] & 0xFF) << 24 | (bytes[index + 2] & 0xFF) << 16 |
-                   (bytes[index + 1] & 0xFF) <<  8 | (bytes[index + 0] & 0xFF);
-        }
-    }
-    
-    private static UTF32 beInstance = null;
-    private static UTF32 leInstance = null;
-}