mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-5018 fix all failures
X-SVN-Rev: 20232
This commit is contained in:
parent
e2eedc222d
commit
74a0af671d
9 changed files with 261 additions and 107 deletions
|
@ -14,17 +14,37 @@ import java.nio.CharBuffer;
|
|||
import java.nio.IntBuffer;
|
||||
import java.nio.charset.CoderResult;
|
||||
|
||||
/**
|
||||
* <h2> Callback API for CharsetICU API </h2>
|
||||
*
|
||||
* CharsetCallback class defines some error behaviour functions called
|
||||
* by CharsetDecoderICU and CharsetEncoderICU. The class also provides
|
||||
* the facility by which clients can write their own callbacks.
|
||||
*
|
||||
* These functions, although public, should NEVER be called directly.
|
||||
* They should be used as parameters to the onUmappableCharacter() and
|
||||
* onMalformedInput() methods, to set the behaviour of a converter
|
||||
* when it encounters UNMAPPED/INVALID sequences.
|
||||
* Currently the only way to set callbacks is by using CodingErrorAction.
|
||||
* In the future we will provide set methods on CharsetEncoder and CharsetDecoder
|
||||
* that will accept CharsetCallback fields.
|
||||
*
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
|
||||
/*public*/ class CharsetCallback {
|
||||
/**
|
||||
* FROM_U, TO_U context options for sub callback
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
/*public*/ static final String SUB_STOP_ON_ILLEGAL = "i";
|
||||
|
||||
/**
|
||||
* FROM_U, TO_U context options for skip callback
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
/*public*/ static final String SKIP_STOP_ON_ILLEGAL = "i";
|
||||
|
||||
|
@ -42,37 +62,55 @@ import java.nio.charset.CoderResult;
|
|||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
|
||||
* TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
/*public*/ static final String ESCAPE_C = "C";
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
|
||||
* TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
/*public*/ static final String ESCAPE_XML_DEC = "D";
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
|
||||
* TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
/*public*/ static final String ESCAPE_XML_HEX = "X";
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape teh code unit according to Unicode (U+XXXXX)
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
/*public*/ static final String ESCAPE_UNICODE = "U";
|
||||
|
||||
/**
|
||||
* Decoder Callback interface
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public interface Decoder {
|
||||
public CoderResult call(CharsetDecoderICU decoder, Object context,
|
||||
ByteBuffer source, CharBuffer target, IntBuffer offsets,
|
||||
char[] buffer, int length, CoderResult cr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encoder Callback interface
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public interface Encoder {
|
||||
public CoderResult call(CharsetEncoderICU encoder, Object context,
|
||||
CharBuffer source, ByteBuffer target, IntBuffer offsets,
|
||||
char[] buffer, int length, int cp, CoderResult cr);
|
||||
}
|
||||
/**
|
||||
* Skip callback
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final Encoder FROM_U_CALLBACK_SKIP = new Encoder() {
|
||||
public CoderResult call(CharsetEncoderICU encoder, Object context,
|
||||
CharBuffer source, ByteBuffer target, IntBuffer offsets,
|
||||
|
@ -89,6 +127,11 @@ import java.nio.charset.CoderResult;
|
|||
return cr;
|
||||
}
|
||||
};
|
||||
/**
|
||||
* Skip callback
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final Decoder TO_U_CALLBACK_SKIP = new Decoder() {
|
||||
public CoderResult call(CharsetDecoderICU decoder, Object context,
|
||||
ByteBuffer source, CharBuffer target, IntBuffer offsets,
|
||||
|
@ -105,7 +148,11 @@ import java.nio.charset.CoderResult;
|
|||
return cr;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Skip callback
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final Encoder FROM_U_CALLBACK_SUBSTITUTE = new Encoder(){
|
||||
public CoderResult call(CharsetEncoderICU encoder, Object context,
|
||||
CharBuffer source, ByteBuffer target, IntBuffer offsets,
|
||||
|
@ -122,7 +169,11 @@ import java.nio.charset.CoderResult;
|
|||
return cr;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Skip callback
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final Decoder TO_U_CALLBACK_SUBSTITUTE = new Decoder() {
|
||||
public CoderResult call(CharsetDecoderICU decoder, Object context,
|
||||
ByteBuffer source, CharBuffer target, IntBuffer offsets,
|
||||
|
@ -140,7 +191,11 @@ import java.nio.charset.CoderResult;
|
|||
return cr;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Skip callback
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final Encoder FROM_U_CALLBACK_STOP = new Encoder() {
|
||||
public CoderResult call(CharsetEncoderICU encoder, Object context,
|
||||
CharBuffer source, ByteBuffer target, IntBuffer offsets,
|
||||
|
@ -148,6 +203,11 @@ import java.nio.charset.CoderResult;
|
|||
return cr;
|
||||
}
|
||||
};
|
||||
/**
|
||||
* Skip callback
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final Decoder TO_U_CALLBACK_STOP = new Decoder() {
|
||||
public CoderResult call(CharsetDecoderICU decoder, Object context,
|
||||
ByteBuffer source, CharBuffer target, IntBuffer offsets,
|
||||
|
|
|
@ -18,8 +18,16 @@ import java.nio.charset.CodingErrorAction;
|
|||
import java.nio.charset.MalformedInputException;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import com.ibm.icu.charset.CharsetCallback;
|
||||
import com.ibm.icu.impl.Assert;
|
||||
|
||||
/**
|
||||
* An abstract class that provides framework implementation for concrete sub class implementations
|
||||
* to utilize. In the future this class will contain API that will implement converter sematics of ICU4C.
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
|
||||
public abstract class CharsetDecoderICU extends CharsetDecoder{
|
||||
|
||||
protected int toUnicodeStatus;
|
||||
|
@ -156,6 +164,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
|
|||
* @param in
|
||||
* @param out
|
||||
* @return
|
||||
* @draft ICU 3.6
|
||||
*/
|
||||
protected abstract CoderResult decodeLoop(ByteBuffer in, CharBuffer out, IntBuffer offsets);
|
||||
|
||||
|
@ -167,6 +176,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
|
|||
* @param flush
|
||||
* @return
|
||||
* @throws MalformedInputException
|
||||
* @draft ICU 3.6
|
||||
*/
|
||||
protected final CoderResult decode(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
|
||||
|
||||
|
@ -551,7 +561,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
|
|||
* @param status ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return The number of chars in the state. -1 if an error is encountered.
|
||||
* @draft ICU 3.4
|
||||
* @draft ICU 3.6
|
||||
*/
|
||||
/*public*/ int toUCountPending() {
|
||||
if(preToULength > 0){
|
||||
|
|
|
@ -21,7 +21,12 @@ import java.nio.charset.MalformedInputException;
|
|||
import com.ibm.icu.impl.Assert;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
|
||||
|
||||
/**
|
||||
* An abstract class that provides framework implementation for concrete sub class implementations
|
||||
* to utilize. In the future this class will contain API that will implement converter sematics of ICU4C.
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public abstract class CharsetEncoderICU extends CharsetEncoder {
|
||||
|
||||
protected byte[] errorBuffer = new byte[30];
|
||||
|
@ -522,18 +527,13 @@ public abstract class CharsetEncoderICU extends CharsetEncoder {
|
|||
public boolean canEncode(int codepoint) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Overrides super class method
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
public boolean isLegalReplacement(byte[] repl){
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Releases the system resources by cleanly closing ICU converter opened
|
||||
* @exception Throwable exception thrown by super class' finalize method
|
||||
* @draft ICU 3.6
|
||||
*/
|
||||
protected void finalize() throws Throwable {
|
||||
}
|
||||
|
||||
protected static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv,
|
||||
byte[] bytesArray, int bytesBegin, int bytesLength,
|
||||
|
|
|
@ -12,6 +12,7 @@ package com.ibm.icu.charset;
|
|||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.lang.reflect.Constructor;
|
||||
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.IllegalCharsetNameException;
|
||||
|
@ -19,30 +20,56 @@ import java.nio.charset.UnsupportedCharsetException;
|
|||
import java.util.HashMap;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* <p>This API is used to convert codepage or character encoded data to and
|
||||
* from UTF-16. You can open a converter with {@link forName() } and {@link forNameICU() }. With that
|
||||
* converter, you can get its properties, set options, convert your data.</p>
|
||||
*
|
||||
* <p>Since many software programs recogize different converter names for
|
||||
* different types of converters, there are other functions in this API to
|
||||
* iterate over the converter aliases.
|
||||
*
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public abstract class CharsetICU extends Charset{
|
||||
|
||||
protected String icuCanonicalName;
|
||||
protected String javaCanonicalName;
|
||||
protected int options;
|
||||
|
||||
protected int maxBytesPerChar;
|
||||
protected int minBytesPerChar;
|
||||
protected float maxCharsPerByte;
|
||||
protected byte subChar1 = 0x00;
|
||||
|
||||
protected int mode;
|
||||
protected boolean flush;
|
||||
protected boolean useFallback;
|
||||
|
||||
protected String name; /* +4: 60 internal name of the converter- invariant chars */
|
||||
|
||||
protected int codepage; /* +64: 4 codepage # (now IBM-$codepage) */
|
||||
|
||||
protected byte platform; /* +68: 1 platform of the converter (only IBM now) */
|
||||
protected byte conversionType; /* +69: 1 conversion type */
|
||||
|
||||
protected int minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */
|
||||
protected int maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */
|
||||
|
||||
protected byte subChar[/*UCNV_MAX_SUBCHAR_LEN*/]; /* +72: 4 [note: 4 and 8 byte boundary] */
|
||||
protected byte subCharLen; /* +76: 1 */
|
||||
|
||||
protected byte hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */
|
||||
protected byte hasFromUnicodeFallback; /* +78: 1 */
|
||||
protected short unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */
|
||||
protected byte subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */
|
||||
protected byte reserved[/*19*/]; /* +81: 19 to round out the structure */
|
||||
|
||||
/**
|
||||
*
|
||||
* @param icuCanonicalName
|
||||
* @param canonName
|
||||
* @param aliases
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
protected CharsetICU(String icuCanonicalName, String canonicalName, String[] aliases) {
|
||||
super(canonicalName,aliases);
|
||||
|
@ -55,8 +82,10 @@ public abstract class CharsetICU extends Charset{
|
|||
|
||||
/**
|
||||
* Ascertains if a charset is a sub set of this charset
|
||||
* Implements the abstract method of super class.
|
||||
* @param cs charset to test
|
||||
* @return true if the given charset is a subset of this charset
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
public boolean contains(Charset cs){
|
||||
if (null == cs) {
|
||||
|
@ -68,14 +97,13 @@ public abstract class CharsetICU extends Charset{
|
|||
}
|
||||
private static final HashMap algorithmicCharsets = new HashMap();
|
||||
static{
|
||||
algorithmicCharsets.put("BOCU-1", "com.ibm.icu.impl.CharsetBOCU1" );
|
||||
/*algorithmicCharsets.put("BOCU-1", "com.ibm.icu.impl.CharsetBOCU1" );
|
||||
algorithmicCharsets.put("CESU-8", "com.ibm.icu.impl.CharsetCESU8" );
|
||||
algorithmicCharsets.put("HZ", "com.ibm.icu.impl.CharsetHZ" );
|
||||
algorithmicCharsets.put("imapmailboxname", "com.ibm.icu.impl.CharsetIMAP" );
|
||||
algorithmicCharsets.put("ISCII", "com.ibm.icu.impl.CharsetISCII" );
|
||||
algorithmicCharsets.put("iso2022", "com.ibm.icu.impl.CharsetISO2022" );
|
||||
algorithmicCharsets.put("iso88591", "com.ibm.icu.impl.CharsetBOCU1" );
|
||||
algorithmicCharsets.put("lmbcs1", "com.ibm.icu.impl.CharsetLMBCS1" );
|
||||
algorithmicCharsets.put("iso2022", "com.ibm.icu.impl.CharsetISO2022" );*/
|
||||
/*algorithmicCharsets.put("lmbcs1", "com.ibm.icu.impl.CharsetLMBCS1" );
|
||||
algorithmicCharsets.put("lmbcs11", "com.ibm.icu.impl.CharsetLMBCS11" );
|
||||
algorithmicCharsets.put("lmbcs16", "com.ibm.icu.impl.CharsetLMBCS16" );
|
||||
algorithmicCharsets.put("lmbcs17", "com.ibm.icu.impl.CharsetLMBCS17" );
|
||||
|
@ -86,9 +114,10 @@ public abstract class CharsetICU extends Charset{
|
|||
algorithmicCharsets.put("lmbcs4", "com.ibm.icu.impl.CharsetLMBCS4" );
|
||||
algorithmicCharsets.put("lmbcs5", "com.ibm.icu.impl.CharsetLMBCS5" );
|
||||
algorithmicCharsets.put("lmbcs6", "com.ibm.icu.impl.CharsetLMBCS6" );
|
||||
algorithmicCharsets.put("lmbcs8", "com.ibm.icu.impl.CharsetLMBCS8" );
|
||||
algorithmicCharsets.put("scsu", "com.ibm.icu.impl.CharsetSCSU" );
|
||||
algorithmicCharsets.put("usascii", "com.ibm.icu.impl.CharsetUSASCII" );
|
||||
algorithmicCharsets.put("lmbcs8", "com.ibm.icu.impl.CharsetLMBCS8" )
|
||||
algorithmicCharsets.put("scsu", "com.ibm.icu.impl.CharsetSCSU" ); */
|
||||
algorithmicCharsets.put("US-ASCII", "com.ibm.icu.impl.CharsetASCII" );
|
||||
algorithmicCharsets.put("ISO-8859-1", "com.ibm.icu.impl.Charset88591" );
|
||||
algorithmicCharsets.put("UTF-16", "com.ibm.icu.impl.CharsetUTF16" );
|
||||
algorithmicCharsets.put("UTF-16BE", "com.ibm.icu.impl.CharsetUTF16" );
|
||||
algorithmicCharsets.put("UTF-16LE", "com.ibm.icu.impl.CharsetUTF16LE" );
|
||||
|
@ -153,7 +182,9 @@ public abstract class CharsetICU extends Charset{
|
|||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Returns the default charset name
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final String getDefaultCharsetName(){
|
||||
String defaultEncoding = new InputStreamReader(new ByteArrayInputStream(new byte[0])).getEncoding();
|
||||
|
@ -179,6 +210,8 @@ public abstract class CharsetICU extends Charset{
|
|||
* @throws UnsupportedCharsetException If no support for the
|
||||
* named charset is available in this instance of th Java
|
||||
* virtual machine
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static Charset forNameICU(String charsetName) throws IllegalCharsetNameException, UnsupportedCharsetException {
|
||||
CharsetProviderICU icuProvider = new CharsetProviderICU();
|
||||
|
|
|
@ -20,20 +20,21 @@ import java.util.NoSuchElementException;
|
|||
|
||||
import com.ibm.icu.impl.UConverterAlias;
|
||||
|
||||
/**
|
||||
* A concrete subclass of CharsetProvider for loading and providing charset converters
|
||||
* in ICU.
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*
|
||||
*/
|
||||
public final class CharsetProviderICU extends CharsetProvider{
|
||||
|
||||
/**
|
||||
* Constructs a CharsetProviderICU object
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
public CharsetProviderICU(){
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a charset for the given charset name
|
||||
* Constructs a charset for the given charset name.
|
||||
* Implements the abstract method of super class.
|
||||
* @param charsetName charset name
|
||||
* @return charset objet for the given charset name, null if unsupported
|
||||
* @stable ICU 2.4
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
public final Charset charsetForName(String charsetName){
|
||||
try{
|
||||
|
@ -57,6 +58,7 @@ public final class CharsetProviderICU extends CharsetProvider{
|
|||
* @param enc converter name
|
||||
* @return canonical name of the converter
|
||||
* @internal ICU 3.4
|
||||
* @deprecated This API is for internal ICU use only
|
||||
*/
|
||||
public static final String getICUCanonicalName(String enc)
|
||||
throws UnsupportedCharsetException{
|
||||
|
@ -228,8 +230,10 @@ public final class CharsetProviderICU extends CharsetProvider{
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns an iterator for the available charsets
|
||||
* Returns an iterator for the available charsets.
|
||||
* Implements the abstract method of super class.
|
||||
* @return Iterator the charset name iterator
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
public final Iterator charsets(){
|
||||
HashMap map = new HashMap();
|
||||
|
@ -239,6 +243,8 @@ public final class CharsetProviderICU extends CharsetProvider{
|
|||
/**
|
||||
* Gets the canonical names of available converters
|
||||
* @return Object[] names as an object array
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final Object[] getAvailableNames(){
|
||||
HashMap map = new HashMap();
|
||||
|
@ -247,7 +253,9 @@ public final class CharsetProviderICU extends CharsetProvider{
|
|||
}
|
||||
/**
|
||||
* Return all names available
|
||||
* @return
|
||||
* @return String[] an arrya of all available names
|
||||
* @draft ICU 3.6
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final String[] getAllNames(){
|
||||
int num = UConverterAlias.countAvailable();
|
||||
|
|
|
@ -9,12 +9,19 @@
|
|||
|
||||
package com.ibm.icu.dev.test.charset;
|
||||
|
||||
import java.nio.*;
|
||||
import java.nio.charset.spi.*;
|
||||
import java.nio.charset.*;
|
||||
import java.util.*;
|
||||
|
||||
//import sun.misc.ASCIICaseInsensitiveComparator;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.CharacterCodingException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.nio.charset.UnsupportedCharsetException;
|
||||
import java.nio.charset.spi.CharsetProvider;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
import java.util.SortedMap;
|
||||
|
||||
import com.ibm.icu.charset.*;
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
|
@ -121,6 +128,46 @@ public class TestCharset extends TestFmwk {
|
|||
smBufEncode(encoder, "UTF-16", us, newBS);
|
||||
}
|
||||
|
||||
}
|
||||
public void TestASCIIConverter(){
|
||||
CharsetProvider icu = new CharsetProviderICU();
|
||||
Charset icuChar = icu.charsetForName("ASCII");
|
||||
CharsetEncoder encoder = icuChar.newEncoder();
|
||||
CharsetDecoder decoder = icuChar.newDecoder();
|
||||
|
||||
CharBuffer us = CharBuffer.allocate(0x90);
|
||||
ByteBuffer bs = ByteBuffer.allocate(0x90);
|
||||
for(int j=0;j<=0x7f; j++){
|
||||
us.put((char)j);
|
||||
bs.put((byte)j);
|
||||
}
|
||||
bs.limit(bs.position());
|
||||
bs.position(0);
|
||||
us.limit(us.position());
|
||||
us.position(0);
|
||||
smBufDecode(decoder, "ASCII", bs, us);
|
||||
smBufEncode(encoder, "ASCII", us, bs);
|
||||
|
||||
}
|
||||
public void Test88591Converter(){
|
||||
CharsetProvider icu = new CharsetProviderICU();
|
||||
Charset icuChar = icu.charsetForName("iso-8859-1");
|
||||
CharsetEncoder encoder = icuChar.newEncoder();
|
||||
CharsetDecoder decoder = icuChar.newDecoder();
|
||||
|
||||
CharBuffer us = CharBuffer.allocate(0x100);
|
||||
ByteBuffer bs = ByteBuffer.allocate(0x100);
|
||||
for(int j=0;j<=0xFf; j++){
|
||||
us.put((char)j);
|
||||
bs.put((byte)j);
|
||||
}
|
||||
bs.limit(bs.position());
|
||||
bs.position(0);
|
||||
us.limit(us.position());
|
||||
us.position(0);
|
||||
smBufDecode(decoder, "iso-8859-1", bs, us);
|
||||
smBufEncode(encoder, "iso-8859-1", us, bs);
|
||||
|
||||
}
|
||||
|
||||
public void TestAPISemantics(/*String encoding*/)
|
||||
|
@ -526,11 +573,17 @@ public class TestCharset extends TestFmwk {
|
|||
Charset cs = icu.charsetForName((String)charsets[i]);
|
||||
try{
|
||||
CharsetEncoder encoder = cs.newEncoder();
|
||||
if(encoder!=null){
|
||||
logln("Creation of encoder succeeded. "+cs.toString());
|
||||
}
|
||||
}catch(Exception ex){
|
||||
errln("Could not instantiate encoder for "+charsets[i]+". Error: "+ex.toString());
|
||||
}
|
||||
try{
|
||||
CharsetDecoder decoder = cs.newDecoder();
|
||||
if(decoder!=null){
|
||||
logln("Creation of decoder succeeded. "+cs.toString());
|
||||
}
|
||||
}catch(Exception ex){
|
||||
errln("Could not instantiate decoder for "+charsets[i]+". Error: "+ex.toString());
|
||||
}
|
||||
|
@ -598,6 +651,7 @@ public class TestCharset extends TestFmwk {
|
|||
errln("Unexpected exception: "+ex.toString());
|
||||
}
|
||||
}
|
||||
/*
|
||||
public void TestImplFlushFailure(){
|
||||
|
||||
try{
|
||||
|
@ -614,17 +668,21 @@ public class TestCharset extends TestFmwk {
|
|||
errln("Could not create encoder for iso-2022-jp exception: "+ex.toString());
|
||||
}
|
||||
}
|
||||
|
||||
*/
|
||||
public void TestISO88591() {
|
||||
/*
|
||||
|
||||
Charset cs = new CharsetProviderICU().charsetForName("iso-8859-1");
|
||||
CharsetEncoder encoder = cs.newEncoder();
|
||||
if(encoder!=null){
|
||||
encoder.canEncode("\uc2a3");
|
||||
if(cs!=null){
|
||||
CharsetEncoder encoder = cs.newEncoder();
|
||||
if(encoder!=null){
|
||||
encoder.canEncode("\uc2a3");
|
||||
}else{
|
||||
errln("Could not create encoder for iso-8859-1");
|
||||
}
|
||||
}else{
|
||||
errln("Could not create encoder for iso-8859-1");
|
||||
errln("Could not create Charset for iso-8859-1");
|
||||
}
|
||||
*/
|
||||
|
||||
}
|
||||
public void TestUTF8Encode() {
|
||||
CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("utf-8").newEncoder();
|
||||
|
@ -708,7 +766,7 @@ public class TestCharset extends TestFmwk {
|
|||
myTarget.limit(target.limit());
|
||||
mySource.limit(source.limit());
|
||||
mySource.position(source.position());
|
||||
int inputLen = mySource.limit();
|
||||
int inputLen = mySource.remaining();
|
||||
|
||||
CoderResult result = CoderResult.UNDERFLOW;
|
||||
for(int i=1; i<=inputLen; i++) {
|
||||
|
@ -1098,12 +1156,16 @@ public class TestCharset extends TestFmwk {
|
|||
ByteBuffer out = ByteBuffer.allocate(6);
|
||||
encoderICU.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
||||
CoderResult result = encoderICU.encode(CharBuffer.wrap("\u0131\u0061\u00a1"), out, true);
|
||||
byte[] expected = {(byte)0xA9, (byte)0xA5, (byte)0xAF, (byte)0xFE, (byte)0xA2, (byte)0xAE};
|
||||
if(!equals(expected, out.array())){
|
||||
errln("Did not get the expected result for substitution bytes. Got: "+
|
||||
hex(out.array()));
|
||||
if(!result.isError()){
|
||||
byte[] expected = {(byte)0xA9, (byte)0xA5, (byte)0xAF, (byte)0xFE, (byte)0xA2, (byte)0xAE};
|
||||
if(!equals(expected, out.array())){
|
||||
errln("Did not get the expected result for substitution bytes. Got: "+
|
||||
hex(out.array()));
|
||||
}
|
||||
logln("Output: "+ hex(out.array()));
|
||||
}else{
|
||||
errln("Encode operation failed for encoder: "+encoderICU.toString());
|
||||
}
|
||||
logln("Output: "+ hex(out.array()));
|
||||
}
|
||||
{
|
||||
// Decoder: to Unicode conversion
|
||||
|
@ -1111,12 +1173,16 @@ public class TestCharset extends TestFmwk {
|
|||
CharBuffer out = CharBuffer.allocate(3);
|
||||
decoderICU.onMalformedInput(CodingErrorAction.REPLACE);
|
||||
CoderResult result = decoderICU.decode(ByteBuffer.wrap(new byte[] { (byte)0xA2, (byte)0xAE, (byte)0x12, (byte)0x34, (byte)0xEF, (byte)0xDC }), out, true);
|
||||
char[] expected = {'\u00a1', '\ufffd', '\u6676'};
|
||||
if(!equals(expected, out.array())){
|
||||
errln("Did not get the expected result for substitution chars. Got: "+
|
||||
hex(out.array()));
|
||||
if(!result.isError()){
|
||||
char[] expected = {'\u00a1', '\ufffd', '\u6676'};
|
||||
if(!equals(expected, out.array())){
|
||||
errln("Did not get the expected result for substitution chars. Got: "+
|
||||
hex(out.array()));
|
||||
}
|
||||
logln("Output: "+ hex(out.array()));
|
||||
}else{
|
||||
errln("Decode operation failed for encoder: "+decoderICU.toString());
|
||||
}
|
||||
logln("Output: "+ hex(out.array()));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1271,7 +1337,11 @@ public class TestCharset extends TestFmwk {
|
|||
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||
try{
|
||||
CharBuffer out = decoder.decode(in);
|
||||
if(out!=null){
|
||||
logln(cs.toString()+" encoing succeeded as expected!");
|
||||
}
|
||||
}catch ( Exception ex){
|
||||
errln("Did not get expected exception for encoding: "+cs.toString());
|
||||
return ex;
|
||||
}
|
||||
return null;
|
||||
|
|
|
@ -874,7 +874,7 @@ public final class UTF16Test extends TestFmwk
|
|||
*/
|
||||
public void TestValueOf()
|
||||
{
|
||||
if(UTF16.getCodePoint('\ud800','\udc00')!=0x10000){
|
||||
if(UCharacter.getCodePoint('\ud800','\udc00')!=0x10000){
|
||||
errln("FAIL: getCodePoint('\ud800','\udc00')");
|
||||
}
|
||||
if (!UTF16.valueOf(0x61).equals("a") ||
|
||||
|
|
|
@ -791,15 +791,8 @@ public class CharsetMBCS extends CharsetICU {
|
|||
|
||||
protected static int GET_MAX_BYTES_PER_UCHAR(ByteBuffer indexes)
|
||||
{
|
||||
indexes.position(0);
|
||||
IntBuffer a = indexes.asIntBuffer();
|
||||
int n;
|
||||
if(a.hasArray())
|
||||
n = a.array()[EXT_COUNT_BYTES];
|
||||
else
|
||||
n = a.get(EXT_COUNT_BYTES);
|
||||
|
||||
return indexes.getInt(4*n)&0xff;
|
||||
indexes.position(0);
|
||||
return indexes.getInt(EXT_COUNT_BYTES)&0xff;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -293,8 +293,7 @@ public class CharsetUTF8 extends CharsetICU {
|
|||
|
||||
try{
|
||||
|
||||
if (fromUChar32 != 0 && target.hasRemaining())
|
||||
{
|
||||
if (fromUChar32 != 0 && target.hasRemaining()){
|
||||
ch = fromUChar32;
|
||||
fromUChar32 = 0;
|
||||
|
||||
|
@ -319,13 +318,10 @@ public class CharsetUTF8 extends CharsetICU {
|
|||
doloop = false;
|
||||
}
|
||||
|
||||
if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE)
|
||||
{
|
||||
if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE){
|
||||
indexToWrite = 2;
|
||||
temp[2] = (byte) ((ch >>> 12) | 0xe0);
|
||||
}
|
||||
else
|
||||
{
|
||||
}else{
|
||||
indexToWrite = 3;
|
||||
temp[3] = (byte) ((ch >>> 18) | 0xf0);
|
||||
temp[2] = (byte) (((ch >>> 12) & 0x3f) | 0x80);
|
||||
|
@ -333,14 +329,10 @@ public class CharsetUTF8 extends CharsetICU {
|
|||
temp[1] = (byte) (((ch >>> 6) & 0x3f) | 0x80);
|
||||
temp[0] = (byte) ((ch & 0x3f) | 0x80);
|
||||
|
||||
for (; indexToWrite >= 0; indexToWrite--)
|
||||
{
|
||||
if (target.hasRemaining())
|
||||
{
|
||||
for (; indexToWrite >= 0; indexToWrite--){
|
||||
if (target.hasRemaining()){
|
||||
target.put(temp[indexToWrite]);
|
||||
}
|
||||
else
|
||||
{
|
||||
}else{
|
||||
errorBuffer[errorBufferLength++] = temp[indexToWrite];
|
||||
cr = CoderResult.OVERFLOW;
|
||||
}
|
||||
|
@ -348,31 +340,20 @@ public class CharsetUTF8 extends CharsetICU {
|
|||
}
|
||||
|
||||
if(doloop) {
|
||||
while (sourceArrayIndex < source.limit() && target.hasRemaining())
|
||||
{
|
||||
while (sourceArrayIndex < source.limit() && target.hasRemaining()){
|
||||
ch = source.get(sourceArrayIndex++);
|
||||
|
||||
if (ch < 0x80) /* Single byte */
|
||||
{
|
||||
if (ch < 0x80){ /* Single byte */
|
||||
target.put((byte)ch);
|
||||
}
|
||||
else if (ch < 0x800) /* Double byte */
|
||||
{
|
||||
}else if (ch < 0x800) { /* Double byte */
|
||||
target.put((byte) ((ch >>> 6) | 0xc0));
|
||||
if (target.hasRemaining())
|
||||
{
|
||||
if (target.hasRemaining()){
|
||||
target.put((byte) ((ch & 0x3f) | 0x80));
|
||||
}
|
||||
else
|
||||
{
|
||||
}else{
|
||||
errorBuffer[0] = (byte) ((ch & 0x3f) | 0x80);
|
||||
errorBufferLength = 1;
|
||||
throw new BufferOverflowException();
|
||||
}
|
||||
}
|
||||
else
|
||||
/* Check for surrogates */
|
||||
{
|
||||
}else{ /* Check for surrogates */
|
||||
if(UTF16.isSurrogate((char)ch) && !isCESU8) {
|
||||
if(UTF16.isLeadSurrogate((char)ch)) {
|
||||
|
||||
|
@ -437,8 +418,7 @@ public class CharsetUTF8 extends CharsetICU {
|
|||
}
|
||||
}
|
||||
|
||||
if (sourceArrayIndex < source.limit() && !target.hasRemaining())
|
||||
{
|
||||
if (sourceArrayIndex < source.limit() && !target.hasRemaining()){
|
||||
cr = CoderResult.OVERFLOW;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue