From 74a0af671da3f16b3b373e8ca7b24a4b79eb0807 Mon Sep 17 00:00:00 2001 From: Ram Viswanadha Date: Fri, 1 Sep 2006 23:41:53 +0000 Subject: [PATCH] ICU-5018 fix all failures X-SVN-Rev: 20232 --- .../com/ibm/icu/charset/CharsetCallback.java | 68 +++++++++- .../ibm/icu/charset/CharsetDecoderICU.java | 12 +- .../ibm/icu/charset/CharsetEncoderICU.java | 20 +-- icu4j/src/com/ibm/icu/charset/CharsetICU.java | 61 ++++++--- .../ibm/icu/charset/CharsetProviderICU.java | 30 +++-- .../ibm/icu/dev/test/charset/TestCharset.java | 118 ++++++++++++++---- .../com/ibm/icu/dev/test/lang/UTF16Test.java | 2 +- icu4j/src/com/ibm/icu/impl/CharsetMBCS.java | 11 +- icu4j/src/com/ibm/icu/impl/CharsetUTF8.java | 46 ++----- 9 files changed, 261 insertions(+), 107 deletions(-) diff --git a/icu4j/src/com/ibm/icu/charset/CharsetCallback.java b/icu4j/src/com/ibm/icu/charset/CharsetCallback.java index 9a2d14bf9e2..e9785805825 100644 --- a/icu4j/src/com/ibm/icu/charset/CharsetCallback.java +++ b/icu4j/src/com/ibm/icu/charset/CharsetCallback.java @@ -14,17 +14,37 @@ import java.nio.CharBuffer; import java.nio.IntBuffer; import java.nio.charset.CoderResult; +/** + *

Callback API for CharsetICU API

+ * + * CharsetCallback class defines some error behaviour functions called + * by CharsetDecoderICU and CharsetEncoderICU. The class also provides + * the facility by which clients can write their own callbacks. + * + * These functions, although public, should NEVER be called directly. + * They should be used as parameters to the onUmappableCharacter() and + * onMalformedInput() methods, to set the behaviour of a converter + * when it encounters UNMAPPED/INVALID sequences. + * Currently the only way to set callbacks is by using CodingErrorAction. + * In the future we will provide set methods on CharsetEncoder and CharsetDecoder + * that will accept CharsetCallback fields. + * + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ /*public*/ class CharsetCallback { /** * FROM_U, TO_U context options for sub callback * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. */ /*public*/ static final String SUB_STOP_ON_ILLEGAL = "i"; /** * FROM_U, TO_U context options for skip callback * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. */ /*public*/ static final String SKIP_STOP_ON_ILLEGAL = "i"; @@ -42,37 +62,55 @@ import java.nio.charset.CoderResult; * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX) * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. */ /*public*/ static final String ESCAPE_C = "C"; /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. */ /*public*/ static final String ESCAPE_XML_DEC = "D"; /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. */ /*public*/ static final String ESCAPE_XML_HEX = "X"; /** * FROM_U_CALLBACK_ESCAPE context option to escape teh code unit according to Unicode (U+XXXXX) * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. */ /*public*/ static final String ESCAPE_UNICODE = "U"; + /** + * Decoder Callback interface + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ public interface Decoder { public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source, CharBuffer target, IntBuffer offsets, char[] buffer, int length, CoderResult cr); } - + /** + * Encoder Callback interface + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ public interface Encoder { public CoderResult call(CharsetEncoderICU encoder, Object context, CharBuffer source, ByteBuffer target, IntBuffer offsets, char[] buffer, int length, int cp, CoderResult cr); } + /** + * Skip callback + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ public static final Encoder FROM_U_CALLBACK_SKIP = new Encoder() { public CoderResult call(CharsetEncoderICU encoder, Object context, CharBuffer source, ByteBuffer target, IntBuffer offsets, @@ -89,6 +127,11 @@ import java.nio.charset.CoderResult; return cr; } }; + /** + * Skip callback + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ public static final Decoder TO_U_CALLBACK_SKIP = new Decoder() { public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source, CharBuffer target, IntBuffer offsets, @@ -105,7 +148,11 @@ import java.nio.charset.CoderResult; return cr; } }; - + /** + * Skip callback + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ public static final Encoder FROM_U_CALLBACK_SUBSTITUTE = new Encoder(){ public CoderResult call(CharsetEncoderICU encoder, Object context, CharBuffer source, ByteBuffer target, IntBuffer offsets, @@ -122,7 +169,11 @@ import java.nio.charset.CoderResult; return cr; } }; - + /** + * Skip callback + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ public static final Decoder TO_U_CALLBACK_SUBSTITUTE = new Decoder() { public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source, CharBuffer target, IntBuffer offsets, @@ -140,7 +191,11 @@ import java.nio.charset.CoderResult; return cr; } }; - + /** + * Skip callback + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ public static final Encoder FROM_U_CALLBACK_STOP = new Encoder() { public CoderResult call(CharsetEncoderICU encoder, Object context, CharBuffer source, ByteBuffer target, IntBuffer offsets, @@ -148,6 +203,11 @@ import java.nio.charset.CoderResult; return cr; } }; + /** + * Skip callback + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ public static final Decoder TO_U_CALLBACK_STOP = new Decoder() { public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source, CharBuffer target, IntBuffer offsets, diff --git a/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java b/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java index 09f791f9021..1e04e23414e 100644 --- a/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java +++ b/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java @@ -18,8 +18,16 @@ import java.nio.charset.CodingErrorAction; import java.nio.charset.MalformedInputException; import java.nio.ByteBuffer; +import com.ibm.icu.charset.CharsetCallback; import com.ibm.icu.impl.Assert; +/** + * An abstract class that provides framework implementation for concrete sub class implementations + * to utilize. In the future this class will contain API that will implement converter sematics of ICU4C. + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ + public abstract class CharsetDecoderICU extends CharsetDecoder{ protected int toUnicodeStatus; @@ -156,6 +164,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ * @param in * @param out * @return + * @draft ICU 3.6 */ protected abstract CoderResult decodeLoop(ByteBuffer in, CharBuffer out, IntBuffer offsets); @@ -167,6 +176,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ * @param flush * @return * @throws MalformedInputException + * @draft ICU 3.6 */ protected final CoderResult decode(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { @@ -551,7 +561,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ * @param status ICU error code in/out parameter. * Must fulfill U_SUCCESS before the function call. * @return The number of chars in the state. -1 if an error is encountered. - * @draft ICU 3.4 + * @draft ICU 3.6 */ /*public*/ int toUCountPending() { if(preToULength > 0){ diff --git a/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java b/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java index ab26d564191..0c4fe4a6bc8 100644 --- a/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java +++ b/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java @@ -21,7 +21,12 @@ import java.nio.charset.MalformedInputException; import com.ibm.icu.impl.Assert; import com.ibm.icu.text.UTF16; - +/** + * An abstract class that provides framework implementation for concrete sub class implementations + * to utilize. In the future this class will contain API that will implement converter sematics of ICU4C. + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ public abstract class CharsetEncoderICU extends CharsetEncoder { protected byte[] errorBuffer = new byte[30]; @@ -522,18 +527,13 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { public boolean canEncode(int codepoint) { return true; } - + /** + * Overrides super class method + * @stable ICU 3.6 + */ public boolean isLegalReplacement(byte[] repl){ return true; } - - /** - * Releases the system resources by cleanly closing ICU converter opened - * @exception Throwable exception thrown by super class' finalize method - * @draft ICU 3.6 - */ - protected void finalize() throws Throwable { - } protected static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv, byte[] bytesArray, int bytesBegin, int bytesLength, diff --git a/icu4j/src/com/ibm/icu/charset/CharsetICU.java b/icu4j/src/com/ibm/icu/charset/CharsetICU.java index 400101db16c..4c8cfb98a27 100644 --- a/icu4j/src/com/ibm/icu/charset/CharsetICU.java +++ b/icu4j/src/com/ibm/icu/charset/CharsetICU.java @@ -12,6 +12,7 @@ package com.ibm.icu.charset; import java.io.ByteArrayInputStream; import java.io.InputStreamReader; import java.lang.reflect.Constructor; + import java.lang.reflect.InvocationTargetException; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; @@ -19,30 +20,56 @@ import java.nio.charset.UnsupportedCharsetException; import java.util.HashMap; import com.ibm.icu.lang.UCharacter; - - - +/** + *

This API is used to convert codepage or character encoded data to and + * from UTF-16. You can open a converter with {@link forName() } and {@link forNameICU() }. With that + * converter, you can get its properties, set options, convert your data.

+ * + *

Since many software programs recogize different converter names for + * different types of converters, there are other functions in this API to + * iterate over the converter aliases. + * + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + */ public abstract class CharsetICU extends Charset{ protected String icuCanonicalName; protected String javaCanonicalName; protected int options; - protected int maxBytesPerChar; - protected int minBytesPerChar; protected float maxCharsPerByte; - protected byte subChar1 = 0x00; protected int mode; protected boolean flush; protected boolean useFallback; + protected String name; /* +4: 60 internal name of the converter- invariant chars */ + + protected int codepage; /* +64: 4 codepage # (now IBM-$codepage) */ + + protected byte platform; /* +68: 1 platform of the converter (only IBM now) */ + protected byte conversionType; /* +69: 1 conversion type */ + + protected int minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */ + protected int maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */ + + protected byte subChar[/*UCNV_MAX_SUBCHAR_LEN*/]; /* +72: 4 [note: 4 and 8 byte boundary] */ + protected byte subCharLen; /* +76: 1 */ + + protected byte hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */ + protected byte hasFromUnicodeFallback; /* +78: 1 */ + protected short unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */ + protected byte subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */ + protected byte reserved[/*19*/]; /* +81: 19 to round out the structure */ + /** * * @param icuCanonicalName * @param canonName * @param aliases * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. */ protected CharsetICU(String icuCanonicalName, String canonicalName, String[] aliases) { super(canonicalName,aliases); @@ -55,8 +82,10 @@ public abstract class CharsetICU extends Charset{ /** * Ascertains if a charset is a sub set of this charset + * Implements the abstract method of super class. * @param cs charset to test * @return true if the given charset is a subset of this charset + * @stable ICU 3.6 */ public boolean contains(Charset cs){ if (null == cs) { @@ -68,14 +97,13 @@ public abstract class CharsetICU extends Charset{ } private static final HashMap algorithmicCharsets = new HashMap(); static{ - algorithmicCharsets.put("BOCU-1", "com.ibm.icu.impl.CharsetBOCU1" ); + /*algorithmicCharsets.put("BOCU-1", "com.ibm.icu.impl.CharsetBOCU1" ); algorithmicCharsets.put("CESU-8", "com.ibm.icu.impl.CharsetCESU8" ); algorithmicCharsets.put("HZ", "com.ibm.icu.impl.CharsetHZ" ); algorithmicCharsets.put("imapmailboxname", "com.ibm.icu.impl.CharsetIMAP" ); algorithmicCharsets.put("ISCII", "com.ibm.icu.impl.CharsetISCII" ); - algorithmicCharsets.put("iso2022", "com.ibm.icu.impl.CharsetISO2022" ); - algorithmicCharsets.put("iso88591", "com.ibm.icu.impl.CharsetBOCU1" ); - algorithmicCharsets.put("lmbcs1", "com.ibm.icu.impl.CharsetLMBCS1" ); + algorithmicCharsets.put("iso2022", "com.ibm.icu.impl.CharsetISO2022" );*/ + /*algorithmicCharsets.put("lmbcs1", "com.ibm.icu.impl.CharsetLMBCS1" ); algorithmicCharsets.put("lmbcs11", "com.ibm.icu.impl.CharsetLMBCS11" ); algorithmicCharsets.put("lmbcs16", "com.ibm.icu.impl.CharsetLMBCS16" ); algorithmicCharsets.put("lmbcs17", "com.ibm.icu.impl.CharsetLMBCS17" ); @@ -86,9 +114,10 @@ public abstract class CharsetICU extends Charset{ algorithmicCharsets.put("lmbcs4", "com.ibm.icu.impl.CharsetLMBCS4" ); algorithmicCharsets.put("lmbcs5", "com.ibm.icu.impl.CharsetLMBCS5" ); algorithmicCharsets.put("lmbcs6", "com.ibm.icu.impl.CharsetLMBCS6" ); - algorithmicCharsets.put("lmbcs8", "com.ibm.icu.impl.CharsetLMBCS8" ); - algorithmicCharsets.put("scsu", "com.ibm.icu.impl.CharsetSCSU" ); - algorithmicCharsets.put("usascii", "com.ibm.icu.impl.CharsetUSASCII" ); + algorithmicCharsets.put("lmbcs8", "com.ibm.icu.impl.CharsetLMBCS8" ) + algorithmicCharsets.put("scsu", "com.ibm.icu.impl.CharsetSCSU" ); */ + algorithmicCharsets.put("US-ASCII", "com.ibm.icu.impl.CharsetASCII" ); + algorithmicCharsets.put("ISO-8859-1", "com.ibm.icu.impl.Charset88591" ); algorithmicCharsets.put("UTF-16", "com.ibm.icu.impl.CharsetUTF16" ); algorithmicCharsets.put("UTF-16BE", "com.ibm.icu.impl.CharsetUTF16" ); algorithmicCharsets.put("UTF-16LE", "com.ibm.icu.impl.CharsetUTF16LE" ); @@ -153,7 +182,9 @@ public abstract class CharsetICU extends Charset{ } /** - * + * Returns the default charset name + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. */ public static final String getDefaultCharsetName(){ String defaultEncoding = new InputStreamReader(new ByteArrayInputStream(new byte[0])).getEncoding(); @@ -179,6 +210,8 @@ public abstract class CharsetICU extends Charset{ * @throws UnsupportedCharsetException If no support for the * named charset is available in this instance of th Java * virtual machine + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. */ public static Charset forNameICU(String charsetName) throws IllegalCharsetNameException, UnsupportedCharsetException { CharsetProviderICU icuProvider = new CharsetProviderICU(); diff --git a/icu4j/src/com/ibm/icu/charset/CharsetProviderICU.java b/icu4j/src/com/ibm/icu/charset/CharsetProviderICU.java index 8ae7d1bdb33..35c0071fa54 100644 --- a/icu4j/src/com/ibm/icu/charset/CharsetProviderICU.java +++ b/icu4j/src/com/ibm/icu/charset/CharsetProviderICU.java @@ -20,20 +20,21 @@ import java.util.NoSuchElementException; import com.ibm.icu.impl.UConverterAlias; +/** + * A concrete subclass of CharsetProvider for loading and providing charset converters + * in ICU. + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. + * + */ public final class CharsetProviderICU extends CharsetProvider{ /** - * Constructs a CharsetProviderICU object - * @stable ICU 2.4 - */ - public CharsetProviderICU(){ - } - - /** - * Constructs a charset for the given charset name + * Constructs a charset for the given charset name. + * Implements the abstract method of super class. * @param charsetName charset name * @return charset objet for the given charset name, null if unsupported - * @stable ICU 2.4 + * @stable ICU 3.6 */ public final Charset charsetForName(String charsetName){ try{ @@ -57,6 +58,7 @@ public final class CharsetProviderICU extends CharsetProvider{ * @param enc converter name * @return canonical name of the converter * @internal ICU 3.4 + * @deprecated This API is for internal ICU use only */ public static final String getICUCanonicalName(String enc) throws UnsupportedCharsetException{ @@ -228,8 +230,10 @@ public final class CharsetProviderICU extends CharsetProvider{ } /** - * Returns an iterator for the available charsets + * Returns an iterator for the available charsets. + * Implements the abstract method of super class. * @return Iterator the charset name iterator + * @stable ICU 3.6 */ public final Iterator charsets(){ HashMap map = new HashMap(); @@ -239,6 +243,8 @@ public final class CharsetProviderICU extends CharsetProvider{ /** * Gets the canonical names of available converters * @return Object[] names as an object array + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. */ public static final Object[] getAvailableNames(){ HashMap map = new HashMap(); @@ -247,7 +253,9 @@ public final class CharsetProviderICU extends CharsetProvider{ } /** * Return all names available - * @return + * @return String[] an arrya of all available names + * @draft ICU 3.6 + * @provisional This API might change or be removed in a future release. */ public static final String[] getAllNames(){ int num = UConverterAlias.countAvailable(); diff --git a/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java b/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java index 1b032a90fda..8007b59c5b4 100644 --- a/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java +++ b/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java @@ -9,12 +9,19 @@ package com.ibm.icu.dev.test.charset; -import java.nio.*; -import java.nio.charset.spi.*; -import java.nio.charset.*; -import java.util.*; - -//import sun.misc.ASCIICaseInsensitiveComparator; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.UnsupportedCharsetException; +import java.nio.charset.spi.CharsetProvider; +import java.util.Iterator; +import java.util.Set; +import java.util.SortedMap; import com.ibm.icu.charset.*; import com.ibm.icu.dev.test.TestFmwk; @@ -121,6 +128,46 @@ public class TestCharset extends TestFmwk { smBufEncode(encoder, "UTF-16", us, newBS); } + } + public void TestASCIIConverter(){ + CharsetProvider icu = new CharsetProviderICU(); + Charset icuChar = icu.charsetForName("ASCII"); + CharsetEncoder encoder = icuChar.newEncoder(); + CharsetDecoder decoder = icuChar.newDecoder(); + + CharBuffer us = CharBuffer.allocate(0x90); + ByteBuffer bs = ByteBuffer.allocate(0x90); + for(int j=0;j<=0x7f; j++){ + us.put((char)j); + bs.put((byte)j); + } + bs.limit(bs.position()); + bs.position(0); + us.limit(us.position()); + us.position(0); + smBufDecode(decoder, "ASCII", bs, us); + smBufEncode(encoder, "ASCII", us, bs); + + } + public void Test88591Converter(){ + CharsetProvider icu = new CharsetProviderICU(); + Charset icuChar = icu.charsetForName("iso-8859-1"); + CharsetEncoder encoder = icuChar.newEncoder(); + CharsetDecoder decoder = icuChar.newDecoder(); + + CharBuffer us = CharBuffer.allocate(0x100); + ByteBuffer bs = ByteBuffer.allocate(0x100); + for(int j=0;j<=0xFf; j++){ + us.put((char)j); + bs.put((byte)j); + } + bs.limit(bs.position()); + bs.position(0); + us.limit(us.position()); + us.position(0); + smBufDecode(decoder, "iso-8859-1", bs, us); + smBufEncode(encoder, "iso-8859-1", us, bs); + } public void TestAPISemantics(/*String encoding*/) @@ -526,11 +573,17 @@ public class TestCharset extends TestFmwk { Charset cs = icu.charsetForName((String)charsets[i]); try{ CharsetEncoder encoder = cs.newEncoder(); + if(encoder!=null){ + logln("Creation of encoder succeeded. "+cs.toString()); + } }catch(Exception ex){ errln("Could not instantiate encoder for "+charsets[i]+". Error: "+ex.toString()); } try{ CharsetDecoder decoder = cs.newDecoder(); + if(decoder!=null){ + logln("Creation of decoder succeeded. "+cs.toString()); + } }catch(Exception ex){ errln("Could not instantiate decoder for "+charsets[i]+". Error: "+ex.toString()); } @@ -598,6 +651,7 @@ public class TestCharset extends TestFmwk { errln("Unexpected exception: "+ex.toString()); } } + /* public void TestImplFlushFailure(){ try{ @@ -614,17 +668,21 @@ public class TestCharset extends TestFmwk { errln("Could not create encoder for iso-2022-jp exception: "+ex.toString()); } } - + */ public void TestISO88591() { - /* + Charset cs = new CharsetProviderICU().charsetForName("iso-8859-1"); - CharsetEncoder encoder = cs.newEncoder(); - if(encoder!=null){ - encoder.canEncode("\uc2a3"); + if(cs!=null){ + CharsetEncoder encoder = cs.newEncoder(); + if(encoder!=null){ + encoder.canEncode("\uc2a3"); + }else{ + errln("Could not create encoder for iso-8859-1"); + } }else{ - errln("Could not create encoder for iso-8859-1"); + errln("Could not create Charset for iso-8859-1"); } - */ + } public void TestUTF8Encode() { CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("utf-8").newEncoder(); @@ -708,7 +766,7 @@ public class TestCharset extends TestFmwk { myTarget.limit(target.limit()); mySource.limit(source.limit()); mySource.position(source.position()); - int inputLen = mySource.limit(); + int inputLen = mySource.remaining(); CoderResult result = CoderResult.UNDERFLOW; for(int i=1; i<=inputLen; i++) { @@ -1098,12 +1156,16 @@ public class TestCharset extends TestFmwk { ByteBuffer out = ByteBuffer.allocate(6); encoderICU.onUnmappableCharacter(CodingErrorAction.REPLACE); CoderResult result = encoderICU.encode(CharBuffer.wrap("\u0131\u0061\u00a1"), out, true); - byte[] expected = {(byte)0xA9, (byte)0xA5, (byte)0xAF, (byte)0xFE, (byte)0xA2, (byte)0xAE}; - if(!equals(expected, out.array())){ - errln("Did not get the expected result for substitution bytes. Got: "+ - hex(out.array())); + if(!result.isError()){ + byte[] expected = {(byte)0xA9, (byte)0xA5, (byte)0xAF, (byte)0xFE, (byte)0xA2, (byte)0xAE}; + if(!equals(expected, out.array())){ + errln("Did not get the expected result for substitution bytes. Got: "+ + hex(out.array())); + } + logln("Output: "+ hex(out.array())); + }else{ + errln("Encode operation failed for encoder: "+encoderICU.toString()); } - logln("Output: "+ hex(out.array())); } { // Decoder: to Unicode conversion @@ -1111,12 +1173,16 @@ public class TestCharset extends TestFmwk { CharBuffer out = CharBuffer.allocate(3); decoderICU.onMalformedInput(CodingErrorAction.REPLACE); CoderResult result = decoderICU.decode(ByteBuffer.wrap(new byte[] { (byte)0xA2, (byte)0xAE, (byte)0x12, (byte)0x34, (byte)0xEF, (byte)0xDC }), out, true); - char[] expected = {'\u00a1', '\ufffd', '\u6676'}; - if(!equals(expected, out.array())){ - errln("Did not get the expected result for substitution chars. Got: "+ - hex(out.array())); + if(!result.isError()){ + char[] expected = {'\u00a1', '\ufffd', '\u6676'}; + if(!equals(expected, out.array())){ + errln("Did not get the expected result for substitution chars. Got: "+ + hex(out.array())); + } + logln("Output: "+ hex(out.array())); + }else{ + errln("Decode operation failed for encoder: "+decoderICU.toString()); } - logln("Output: "+ hex(out.array())); } } @@ -1271,7 +1337,11 @@ public class TestCharset extends TestFmwk { decoder.onUnmappableCharacter(CodingErrorAction.REPORT); try{ CharBuffer out = decoder.decode(in); + if(out!=null){ + logln(cs.toString()+" encoing succeeded as expected!"); + } }catch ( Exception ex){ + errln("Did not get expected exception for encoding: "+cs.toString()); return ex; } return null; diff --git a/icu4j/src/com/ibm/icu/dev/test/lang/UTF16Test.java b/icu4j/src/com/ibm/icu/dev/test/lang/UTF16Test.java index 3821e2f7584..17d51bc656b 100755 --- a/icu4j/src/com/ibm/icu/dev/test/lang/UTF16Test.java +++ b/icu4j/src/com/ibm/icu/dev/test/lang/UTF16Test.java @@ -874,7 +874,7 @@ public final class UTF16Test extends TestFmwk */ public void TestValueOf() { - if(UTF16.getCodePoint('\ud800','\udc00')!=0x10000){ + if(UCharacter.getCodePoint('\ud800','\udc00')!=0x10000){ errln("FAIL: getCodePoint('\ud800','\udc00')"); } if (!UTF16.valueOf(0x61).equals("a") || diff --git a/icu4j/src/com/ibm/icu/impl/CharsetMBCS.java b/icu4j/src/com/ibm/icu/impl/CharsetMBCS.java index c712ee1b94d..cd12cd3dacf 100644 --- a/icu4j/src/com/ibm/icu/impl/CharsetMBCS.java +++ b/icu4j/src/com/ibm/icu/impl/CharsetMBCS.java @@ -791,15 +791,8 @@ public class CharsetMBCS extends CharsetICU { protected static int GET_MAX_BYTES_PER_UCHAR(ByteBuffer indexes) { - indexes.position(0); - IntBuffer a = indexes.asIntBuffer(); - int n; - if(a.hasArray()) - n = a.array()[EXT_COUNT_BYTES]; - else - n = a.get(EXT_COUNT_BYTES); - - return indexes.getInt(4*n)&0xff; + indexes.position(0); + return indexes.getInt(EXT_COUNT_BYTES)&0xff; } /* diff --git a/icu4j/src/com/ibm/icu/impl/CharsetUTF8.java b/icu4j/src/com/ibm/icu/impl/CharsetUTF8.java index 4e77c987afa..b8f87ebe3cc 100644 --- a/icu4j/src/com/ibm/icu/impl/CharsetUTF8.java +++ b/icu4j/src/com/ibm/icu/impl/CharsetUTF8.java @@ -293,8 +293,7 @@ public class CharsetUTF8 extends CharsetICU { try{ - if (fromUChar32 != 0 && target.hasRemaining()) - { + if (fromUChar32 != 0 && target.hasRemaining()){ ch = fromUChar32; fromUChar32 = 0; @@ -319,13 +318,10 @@ public class CharsetUTF8 extends CharsetICU { doloop = false; } - if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) - { + if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE){ indexToWrite = 2; temp[2] = (byte) ((ch >>> 12) | 0xe0); - } - else - { + }else{ indexToWrite = 3; temp[3] = (byte) ((ch >>> 18) | 0xf0); temp[2] = (byte) (((ch >>> 12) & 0x3f) | 0x80); @@ -333,14 +329,10 @@ public class CharsetUTF8 extends CharsetICU { temp[1] = (byte) (((ch >>> 6) & 0x3f) | 0x80); temp[0] = (byte) ((ch & 0x3f) | 0x80); - for (; indexToWrite >= 0; indexToWrite--) - { - if (target.hasRemaining()) - { + for (; indexToWrite >= 0; indexToWrite--){ + if (target.hasRemaining()){ target.put(temp[indexToWrite]); - } - else - { + }else{ errorBuffer[errorBufferLength++] = temp[indexToWrite]; cr = CoderResult.OVERFLOW; } @@ -348,31 +340,20 @@ public class CharsetUTF8 extends CharsetICU { } if(doloop) { - while (sourceArrayIndex < source.limit() && target.hasRemaining()) - { + while (sourceArrayIndex < source.limit() && target.hasRemaining()){ ch = source.get(sourceArrayIndex++); - - if (ch < 0x80) /* Single byte */ - { + if (ch < 0x80){ /* Single byte */ target.put((byte)ch); - } - else if (ch < 0x800) /* Double byte */ - { + }else if (ch < 0x800) { /* Double byte */ target.put((byte) ((ch >>> 6) | 0xc0)); - if (target.hasRemaining()) - { + if (target.hasRemaining()){ target.put((byte) ((ch & 0x3f) | 0x80)); - } - else - { + }else{ errorBuffer[0] = (byte) ((ch & 0x3f) | 0x80); errorBufferLength = 1; throw new BufferOverflowException(); } - } - else - /* Check for surrogates */ - { + }else{ /* Check for surrogates */ if(UTF16.isSurrogate((char)ch) && !isCESU8) { if(UTF16.isLeadSurrogate((char)ch)) { @@ -437,8 +418,7 @@ public class CharsetUTF8 extends CharsetICU { } } - if (sourceArrayIndex < source.limit() && !target.hasRemaining()) - { + if (sourceArrayIndex < source.limit() && !target.hasRemaining()){ cr = CoderResult.OVERFLOW; }