mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-5407 The UTF-32 converter is now supported by the ICU4J charset code.
X-SVN-Rev: 20485
This commit is contained in:
parent
ef584abc21
commit
820cc4cc73
3 changed files with 28 additions and 315 deletions
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
**************************************************************************
|
||||
* Copyright (C) 2005, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
* Copyright (C) 2005-2006, International Business Machines Corporation *
|
||||
* and others. All Rights Reserved. *
|
||||
**************************************************************************
|
||||
*
|
||||
*/
|
||||
|
@ -15,7 +15,6 @@ import java.net.URL;
|
|||
|
||||
import javax.swing.*;
|
||||
|
||||
import com.ibm.icu.impl.UTF32;
|
||||
import com.ibm.icu.text.CharsetDetector;
|
||||
import com.ibm.icu.text.CharsetMatch;
|
||||
|
||||
|
@ -96,12 +95,6 @@ public class DetectingViewer extends JFrame implements ActionListener
|
|||
return new BufferedInputStream(fileStream);
|
||||
}
|
||||
|
||||
private void openFile(String directory, String filename)
|
||||
{
|
||||
openFile(new File(directory, filename));
|
||||
}
|
||||
|
||||
|
||||
private BufferedInputStream openURL(String url)
|
||||
{
|
||||
InputStream s = null;
|
||||
|
@ -260,34 +253,14 @@ public class DetectingViewer extends JFrame implements ActionListener
|
|||
|
||||
inputStream.reset();
|
||||
|
||||
if (encoding.startsWith("UTF-32")) {
|
||||
byte[] bytes = new byte[1024];
|
||||
int offset = 0;
|
||||
int chBytes = 0;
|
||||
UTF32 utf32 = UTF32.getInstance(encoding);
|
||||
|
||||
while ((bytesRead = inputStream.read(bytes, offset, 1024)) >= 0) {
|
||||
offset = bytesRead % 4;
|
||||
chBytes = bytesRead - offset;
|
||||
|
||||
sb.append(utf32.fromBytes(bytes, 0, chBytes));
|
||||
|
||||
if (offset != 0) {
|
||||
for (int i = 0; i < offset; i += 1) {
|
||||
bytes[i] = bytes[chBytes + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
isr = new InputStreamReader(inputStream, encoding);
|
||||
|
||||
while ((bytesRead = isr.read(buffer, 0, 1024)) >= 0) {
|
||||
sb.append(buffer, 0, bytesRead);
|
||||
}
|
||||
|
||||
isr.close();
|
||||
isr = new InputStreamReader(inputStream, encoding);
|
||||
|
||||
while ((bytesRead = isr.read(buffer, 0, 1024)) >= 0) {
|
||||
sb.append(buffer, 0, bytesRead);
|
||||
}
|
||||
|
||||
isr.close();
|
||||
|
||||
this.setTitle(title + " - " + encodingName(matches[0]));
|
||||
|
||||
setMatchMenu(matches);
|
||||
|
|
|
@ -13,7 +13,6 @@ import java.io.Reader;
|
|||
import java.io.UnsupportedEncodingException;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.impl.UTF32;
|
||||
import com.ibm.icu.text.CharsetDetector;
|
||||
import com.ibm.icu.text.CharsetMatch;
|
||||
|
||||
|
@ -141,36 +140,30 @@ public class TestCharsetDetector extends TestFmwk
|
|||
CharsetDetector det = new CharsetDetector();
|
||||
byte[] bytes;
|
||||
|
||||
if (enc.startsWith("UTF-32")) {
|
||||
UTF32 utf32 = UTF32.getInstance(enc);
|
||||
|
||||
bytes = utf32.toBytes(testString);
|
||||
} else {
|
||||
String from = enc;
|
||||
String from = enc;
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
bytes = testString.getBytes(from);
|
||||
} catch (UnsupportedOperationException uoe) {
|
||||
// In some runtimes, the ISO-2022-CN converter
|
||||
// only converts *to* Unicode - we have to use
|
||||
// x-ISO-2022-CN-GB to convert *from* Unicode.
|
||||
if (from.equals("ISO-2022-CN")) {
|
||||
from = "x-ISO-2022-CN-GB";
|
||||
continue;
|
||||
}
|
||||
|
||||
// Ignore any other converters that can't
|
||||
// convert from Unicode.
|
||||
return;
|
||||
} catch (UnsupportedEncodingException uee) {
|
||||
// Ignore any encodings that this runtime
|
||||
// doesn't support.
|
||||
return;
|
||||
while (true) {
|
||||
try {
|
||||
bytes = testString.getBytes(from);
|
||||
} catch (UnsupportedOperationException uoe) {
|
||||
// In some runtimes, the ISO-2022-CN converter
|
||||
// only converts *to* Unicode - we have to use
|
||||
// x-ISO-2022-CN-GB to convert *from* Unicode.
|
||||
if (from.equals("ISO-2022-CN")) {
|
||||
from = "x-ISO-2022-CN-GB";
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
// Ignore any other converters that can't
|
||||
// convert from Unicode.
|
||||
return;
|
||||
} catch (UnsupportedEncodingException uee) {
|
||||
// Ignore any encodings that this runtime
|
||||
// doesn't support.
|
||||
return;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
det.setText(bytes);
|
||||
|
|
|
@ -1,253 +0,0 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2005, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
*/
|
||||
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import com.ibm.icu.text.UTF16;
|
||||
|
||||
/**
|
||||
* This class converts between an array of bytes in UTF-32 encoding (BE or LE) and
|
||||
* Java Strings.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public abstract class UTF32
|
||||
{
|
||||
/**
|
||||
* This method packs a 32-bit Unicode code point into the byte array. It is
|
||||
* implemented by subclasses that implement the BE and LE encodings.
|
||||
*
|
||||
* @param bytes the destination byte array
|
||||
* @param codePoint the 32-bit Unicode code point
|
||||
* @param out the destination index in <code>bytes</code>.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
abstract protected void pack(byte[] bytes, int codePoint, int out);
|
||||
|
||||
/**
|
||||
* This method unpacks bytes from the encoded byte array into a 32-bit
|
||||
* Unicode code point. It is implmeented by subclasses that implmeent the BE and LE encodings.
|
||||
*
|
||||
* @param bytes the source byte array.
|
||||
* @param index the index of the first source byte.
|
||||
* @return the 32-bit Unicode code point.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
abstract protected int unpack(byte[] bytes, int index);
|
||||
|
||||
|
||||
/**
|
||||
* Convert a Java String into an array of UTF-32 encoded bytes. Calls
|
||||
* the <code>pack</code> method to do the encoding.
|
||||
*
|
||||
* @param utf16 the source Java String.
|
||||
* @return an array of UTF-32 encoded bytes.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public byte[] toBytes(String utf16)
|
||||
{
|
||||
int codePoints = UTF16.countCodePoint(utf16);
|
||||
byte[] bytes = new byte[codePoints * 4];
|
||||
int out = 0;
|
||||
|
||||
for (int cp = 0; cp < codePoints; out += 4) {
|
||||
int codePoint = UTF16.charAt(utf16, cp);
|
||||
|
||||
pack(bytes, codePoint, out);
|
||||
cp += UTF16.getCharCount(codePoint);
|
||||
}
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method converts a sequence of UTF-32 encoded bytes into
|
||||
* a Java String. It calls the <code>unpack</code> method to implement
|
||||
* the encoding.
|
||||
*
|
||||
* @param bytes the source byte array.
|
||||
* @param offset the starting offset in the byte array.
|
||||
* @param count the number of bytes to process.
|
||||
* @return the Java String.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public String fromBytes(byte[] bytes, int offset, int count)
|
||||
{
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
int limit = offset + count;
|
||||
|
||||
for (int cp = offset; cp < limit; cp += 4) {
|
||||
int codePoint = unpack(bytes, cp);
|
||||
|
||||
UTF16.append(buffer, codePoint);
|
||||
}
|
||||
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* A convenience method that converts an entire byte array
|
||||
* into a Java String.
|
||||
*
|
||||
* @param bytes the source byte array.
|
||||
* @return the Java String.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public String fromBytes(byte[] bytes)
|
||||
{
|
||||
return fromBytes(bytes, 0, bytes.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an instance that implements UTF-32BE encoding.
|
||||
*
|
||||
* @return the instance.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
static public UTF32 getBEInstance()
|
||||
{
|
||||
if (beInstance == null) {
|
||||
beInstance = new BE();
|
||||
}
|
||||
|
||||
return beInstance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an instance that implemnts the UTF-32LE encoding.
|
||||
*
|
||||
* @return the instance.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
static public UTF32 getLEInstance()
|
||||
{
|
||||
if (leInstance == null) {
|
||||
leInstance = new LE();
|
||||
}
|
||||
|
||||
return leInstance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an instance that implements either UTF-32BE or UTF32-LE,
|
||||
* depending on the encoding name suppled.
|
||||
*
|
||||
* @param encoding the encoding name - must be <code>"UTF-32BE"</code> or <code>"UTF-32LE"</code>.
|
||||
* @return the instance.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
static public UTF32 getInstance(String encoding)
|
||||
{
|
||||
if (encoding.equals("UTF-32BE")) {
|
||||
return getBEInstance();
|
||||
}
|
||||
|
||||
if (encoding.equals("UTF-32LE")) {
|
||||
return getLEInstance();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* This sublcass implements the UTF-32BE encoding via the
|
||||
* <code>pack</code> and <code>unpack</code> methods.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
static class BE extends UTF32
|
||||
{
|
||||
/**
|
||||
* This method packs a 32-bit Unicode code point into the byte array using
|
||||
* the UTF-32BE encoding.
|
||||
*
|
||||
* @param bytes the destination byte array
|
||||
* @param codePoint the 32-bit Unicode code point
|
||||
* @param out the destination index in <code>bytes</code>.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public void pack(byte[] bytes, int codePoint, int out)
|
||||
{
|
||||
bytes[out + 0] = (byte) ((codePoint >> 24) & 0xFF);
|
||||
bytes[out + 1] = (byte) ((codePoint >> 16) & 0xFF);
|
||||
bytes[out + 2] = (byte) ((codePoint >> 8) & 0xFF);
|
||||
bytes[out + 3] = (byte) ((codePoint >> 0) & 0xFF);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method unpacks bytes from the UTF-32BE encoded byte array into a 32-bit
|
||||
* Unicode code point.
|
||||
*
|
||||
* @param bytes the source byte array.
|
||||
* @param index the index of the first source byte.
|
||||
* @return the 32-bit Unicode code point.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public int unpack(byte[] bytes, int index)
|
||||
{
|
||||
return (bytes[index + 0] & 0xFF) << 24 | (bytes[index + 1] & 0xFF) << 16 |
|
||||
(bytes[index + 2] & 0xFF) << 8 | (bytes[index + 3] & 0xFF);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This sublcass implements the UTF-32LE encoding via the
|
||||
* <code>pack</code> and <code>unpack</code> methods.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
static class LE extends UTF32
|
||||
{
|
||||
/**
|
||||
* This method packs a 32-bit Unicode code point into the byte array using
|
||||
* the UTF-32LE encoding.
|
||||
*
|
||||
* @param bytes the destination byte array
|
||||
* @param codePoint the 32-bit Unicode code point
|
||||
* @param out the destination index in <code>bytes</code>.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public void pack(byte[] bytes, int codePoint, int out)
|
||||
{
|
||||
bytes[out + 3] = (byte) ((codePoint >> 24) & 0xFF);
|
||||
bytes[out + 2] = (byte) ((codePoint >> 16) & 0xFF);
|
||||
bytes[out + 1] = (byte) ((codePoint >> 8) & 0xFF);
|
||||
bytes[out + 0] = (byte) ((codePoint >> 0) & 0xFF);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method unpacks bytes from the UTF-32LE encoded byte array into a 32-bit
|
||||
* Unicode code point.
|
||||
*
|
||||
* @param bytes the source byte array.
|
||||
* @param index the index of the first source byte.
|
||||
* @return the 32-bit Unicode code point.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public int unpack(byte[] bytes, int index)
|
||||
{
|
||||
return (bytes[index + 3] & 0xFF) << 24 | (bytes[index + 2] & 0xFF) << 16 |
|
||||
(bytes[index + 1] & 0xFF) << 8 | (bytes[index + 0] & 0xFF);
|
||||
}
|
||||
}
|
||||
|
||||
private static UTF32 beInstance = null;
|
||||
private static UTF32 leInstance = null;
|
||||
}
|
Loading…
Add table
Reference in a new issue