diff --git a/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java b/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java index bcd878fcfa6..af26085e6c5 100644 --- a/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java +++ b/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java @@ -699,7 +699,10 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ * @param offsets * @return A CoderResult object that contains the error result when an error occurs. */ - CoderResult cbToUWriteSub(CharsetDecoderICU decoder, + /* Note: Currently, this method is not being used because the callback method calls toUWriteUChars with + * the substitution characters. Will leave in here for the time being. To be removed later. (4.0) + */ + /*CoderResult cbToUWriteSub(CharsetDecoderICU decoder, ByteBuffer source, CharBuffer target, IntBuffer offsets){ String sub = decoder.replacement(); @@ -713,5 +716,5 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ 0, sub.length(), target, offsets, source.position()); } - } + }*/ } diff --git a/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java b/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java index 8f38ab72ee0..73ce45967f5 100644 --- a/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java +++ b/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java @@ -258,10 +258,11 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { /* do the conversion */ CoderResult ret = encode(in, out, null, false); setSourcePosition(in); - if (ret.isUnderflow() && in.hasRemaining()) { + /* No need to reset to keep the proper state of the encoder. + if (ret.isUnderflow() && in.hasRemaining()) { // The Java framework is going to substitute what is left. - fromUnicodeReset(); - } + //fromUnicodeReset(); + } */ return ret; } @@ -408,9 +409,9 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR); replayArray.put(preFromUArray, 0, -preFromULength); + source = replayArray; source.position(replayArrayIndex); source.limit(replayArrayIndex - preFromULength); //preFromULength is negative, see declaration - source = replayArray; flush = false; preFromULength = 0; @@ -747,7 +748,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { */ private final void setSourcePosition(CharBuffer source) { - // ok was there input held in the previous invocation of decodeLoop + // ok was there input held in the previous invocation of encodeLoop // that resulted in output in this invocation? source.position(source.position() - fromUCountPending()); } diff --git a/icu4j/src/com/ibm/icu/charset/CharsetMBCS.java b/icu4j/src/com/ibm/icu/charset/CharsetMBCS.java index ecfcece48d2..61f41d6ba7d 100644 --- a/icu4j/src/com/ibm/icu/charset/CharsetMBCS.java +++ b/icu4j/src/com/ibm/icu/charset/CharsetMBCS.java @@ -648,8 +648,8 @@ class CharsetMBCS extends CharsetICU { for (int i = 0; i < stage1.length; ++i) { stage1[i] = (char)(((stage[i*2])<<8)|(stage[i*2+1] & UConverterConstants.UNSIGNED_BYTE_MASK)); } - byte[] stage2 = new byte[fullStage2Length*4]; - System.arraycopy(stage, stage1Length*2, stage2, 0, stage2.length); + byte[] stage2 = new byte[stage.length - ((stage1Length * 2) + (fullStage2Length * 4))]; + System.arraycopy(stage, ((stage1Length * 2) + (fullStage2Length * 4)), stage2, 0, stage2.length); mbcsTable.fromUnicodeTable = stage1; mbcsTable.fromUnicodeBytes = stage2; @@ -2932,12 +2932,13 @@ class CharsetMBCS extends CharsetICU { int prevSourceIndex, sourceIndex, nextSourceIndex; int stage2Entry = 0, value = 0, length = 0, prevLength; short uniMask; - //long asciiRoundtrips; + // long asciiRoundtrips; + boolean utf8Friendly = false, gotoUnassigned = false; try { - if (preFromUFirstCP >= 0) { + if (!flush && preFromUFirstCP >= 0) { /* * pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change * with continuous offsets @@ -2972,7 +2973,8 @@ class CharsetMBCS extends CharsetICU { } else { bytes = sharedData.mbcs.fromUnicodeBytes; } - //asciiRoundtrips = sharedData.mbcs.asciiRoundtrips; + + // asciiRoundtrips = sharedData.mbcs.asciiRoundtrips; /* get the converter state from UConverter */ c = fromUChar32; diff --git a/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java b/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java index 38b978fb826..6d12b843ae6 100644 --- a/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java +++ b/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java @@ -4990,4 +4990,72 @@ public class TestCharset extends TestFmwk { } } + + // Port over from ICU4C for test conversion tables (mbcs version 5.x) + // Provide better code coverage in CharsetMBCS, CharsetDecoderICU, and CharsetEncoderICU. + public void TestCharsetTestData() { + CoderResult result = CoderResult.UNDERFLOW; + String charsetName = "test4"; + CharsetProvider provider = new CharsetProviderICU(); + Charset charset = ((CharsetProviderICU)provider).charsetForName(charsetName, "../dev/data/testdata"); + CharsetEncoder encoder = charset.newEncoder(); + CharsetDecoder decoder = charset.newDecoder(); + + byte bytearray[] = { + 0x01, 0x02, 0x03, 0x0a, + 0x01, 0x02, 0x03, 0x0b, + 0x01, 0x02, 0x03, 0x0d, + }; + + // set the callback for overflow errors + ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.OVERFLOW, CharsetCallback.TO_U_CALLBACK_STOP, null); + + ByteBuffer bb = ByteBuffer.wrap(bytearray); + CharBuffer cb = CharBuffer.allocate(10); + + bb.limit(4); + cb.limit(1); // Overflow should occur and is expected + result = decoder.decode(bb, cb, false); + if (result.isError()) { + errln("Error occurred while decoding: " + charsetName + " with error: " + result); + } + + bb.limit(8); + result = decoder.decode(bb, cb, false); + if (result.isError()) { + errln("Error occurred while decoding: " + charsetName + " with error: " + result); + } + + bb.limit(12); + result = decoder.decode(bb, cb, true); + if (result.isError()) { + errln("Error occurred while decoding: " + charsetName + " with error: " + result); + } + + char chararray[] = { + 0xDBC4,0xDE34,0xD900,0xDC05,/* \U00101234\U00050005 */ + 0xD940, /* first half of \U00060006 or \U00060007 */ + 0xDC07/* second half of \U00060007 */ + }; + + cb = CharBuffer.wrap(chararray); + bb = ByteBuffer.allocate(10); + + bb.limit(2); + cb.limit(4); + result = encoder.encode(cb, bb, false); + if (result.isError()) { + errln("Error occurred while encoding: " + charsetName + " with error: " + result); + } + cb.limit(5); + result = encoder.encode(cb, bb, false); + if (result.isError()) { + errln("Error occurred while encoding: " + charsetName + " with error: " + result); + } + cb.limit(6); + result = encoder.encode(cb, bb, true); + if (!result.isError()) { + errln("Error should have occurred while encoding: " + charsetName); + } + } }