ICU-6266 Port over test from ICU4C for test* charsets to improve code coverage. Fix minor replayArray issue in CharsetEncoderICU and other issues.

X-SVN-Rev: 24095
2025-04-13 08:53:20 +00:00 · 2008-06-02 18:42:08 +00:00 · 2008-06-02 18:42:08 +00:00 · 7606da8922
commit 7606da8922
parent c55369fc82
4 changed files with 86 additions and 12 deletions
--- a/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java
@ -699,7 +699,10 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
     * @param offsets
     * @return A CoderResult object that contains the error result when an error occurs.
     */
-     CoderResult cbToUWriteSub(CharsetDecoderICU decoder, 
+    /* Note: Currently, this method is not being used because the callback method calls toUWriteUChars with
+     * the substitution characters. Will leave in here for the time being. To be removed later. (4.0)
+     */
+     /*CoderResult cbToUWriteSub(CharsetDecoderICU decoder, 
                                        ByteBuffer source, CharBuffer target, 
                                        IntBuffer offsets){
        String sub = decoder.replacement();
@ -713,5 +716,5 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
                    0, sub.length(), target, offsets, source.position());
            
        }
-    }
+    }*/
 }
--- a/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java
@ -258,10 +258,11 @@ public abstract class CharsetEncoderICU extends CharsetEncoder {
        /* do the conversion */
        CoderResult ret = encode(in, out, null, false);
        setSourcePosition(in);
-        if (ret.isUnderflow() && in.hasRemaining()) {
+        /* No need to reset to keep the proper state of the encoder.
+         if (ret.isUnderflow() && in.hasRemaining()) {
            // The Java framework is going to substitute what is left.
-            fromUnicodeReset();
-        }
+            //fromUnicodeReset();
+        } */
        return ret;
    }

@ -408,9 +409,9 @@ public abstract class CharsetEncoderICU extends CharsetEncoder {

            //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
            replayArray.put(preFromUArray, 0, -preFromULength);
+            source = replayArray;
            source.position(replayArrayIndex);
            source.limit(replayArrayIndex - preFromULength); //preFromULength is negative, see declaration
-            source = replayArray;
            flush = false;

            preFromULength = 0;
@ -747,7 +748,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder {
     */
    private final void setSourcePosition(CharBuffer source) {

-        // ok was there input held in the previous invocation of decodeLoop 
+        // ok was there input held in the previous invocation of encodeLoop 
        // that resulted in output in this invocation?
        source.position(source.position() - fromUCountPending());
    }
--- a/icu4j/src/com/ibm/icu/charset/CharsetMBCS.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetMBCS.java
@ -648,8 +648,8 @@ class CharsetMBCS extends CharsetICU {
        for (int i = 0; i < stage1.length; ++i) {
            stage1[i] = (char)(((stage[i*2])<<8)|(stage[i*2+1] & UConverterConstants.UNSIGNED_BYTE_MASK));
        }
-        byte[] stage2 = new byte[fullStage2Length*4];
-        System.arraycopy(stage, stage1Length*2, stage2, 0, stage2.length);
+        byte[] stage2 = new byte[stage.length - ((stage1Length * 2) + (fullStage2Length * 4))];
+        System.arraycopy(stage, ((stage1Length * 2) + (fullStage2Length * 4)), stage2, 0, stage2.length);
        
        mbcsTable.fromUnicodeTable = stage1;
        mbcsTable.fromUnicodeBytes = stage2;
@ -2932,12 +2932,13 @@ class CharsetMBCS extends CharsetICU {
            int prevSourceIndex, sourceIndex, nextSourceIndex;
            int stage2Entry = 0, value = 0, length = 0, prevLength;
            short uniMask;
-            //long asciiRoundtrips;
+            // long asciiRoundtrips;
+            
            boolean utf8Friendly = false, gotoUnassigned = false;

            try {

-                if (preFromUFirstCP >= 0) {
+                if (!flush && preFromUFirstCP >= 0) {
                    /*
                     * pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change
                     * with continuous offsets
@ -2972,7 +2973,8 @@ class CharsetMBCS extends CharsetICU {
                } else {
                    bytes = sharedData.mbcs.fromUnicodeBytes;
                }
-                //asciiRoundtrips = sharedData.mbcs.asciiRoundtrips;
+
+                // asciiRoundtrips = sharedData.mbcs.asciiRoundtrips;

                /* get the converter state from UConverter */
                c = fromUChar32;
--- a/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java
+++ b/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java
@ -4990,4 +4990,72 @@ public class TestCharset extends TestFmwk {
        }
        
    }
+    
+    // Port over from ICU4C for test conversion tables (mbcs version 5.x)
+    // Provide better code coverage in CharsetMBCS, CharsetDecoderICU, and CharsetEncoderICU.
+    public void TestCharsetTestData() {
+        CoderResult result = CoderResult.UNDERFLOW;
+        String charsetName = "test4";
+        CharsetProvider provider = new CharsetProviderICU();
+        Charset charset = ((CharsetProviderICU)provider).charsetForName(charsetName, "../dev/data/testdata");
+        CharsetEncoder encoder = charset.newEncoder();
+        CharsetDecoder decoder = charset.newDecoder();
+        
+        byte bytearray[] = {
+                0x01, 0x02, 0x03, 0x0a,
+                0x01, 0x02, 0x03, 0x0b,
+                0x01, 0x02, 0x03, 0x0d,
+        };
+        
+        // set the callback for overflow errors
+        ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.OVERFLOW, CharsetCallback.TO_U_CALLBACK_STOP, null);
+        
+        ByteBuffer bb = ByteBuffer.wrap(bytearray);
+        CharBuffer cb = CharBuffer.allocate(10);
+        
+        bb.limit(4);
+        cb.limit(1); // Overflow should occur and is expected
+        result = decoder.decode(bb, cb, false);
+        if (result.isError()) {
+            errln("Error occurred while decoding: " + charsetName + " with error: " + result);
+        }
+        
+        bb.limit(8);
+        result = decoder.decode(bb, cb, false);
+        if (result.isError()) {
+            errln("Error occurred while decoding: " + charsetName + " with error: " + result);
+        }
+        
+        bb.limit(12);
+        result = decoder.decode(bb, cb, true);
+        if (result.isError()) {
+            errln("Error occurred while decoding: " + charsetName + " with error: " + result);
+        }
+        
+        char chararray[] = {
+                0xDBC4,0xDE34,0xD900,0xDC05,/* \U00101234\U00050005 */
+                0xD940,     /* first half of \U00060006 or \U00060007 */
+                0xDC07/* second half of \U00060007 */
+        };
+        
+        cb = CharBuffer.wrap(chararray);
+        bb = ByteBuffer.allocate(10);
+        
+        bb.limit(2);
+        cb.limit(4);
+        result = encoder.encode(cb, bb, false);
+        if (result.isError()) {
+            errln("Error occurred while encoding: " + charsetName + " with error: " + result);
+        }
+        cb.limit(5);
+        result = encoder.encode(cb, bb, false);
+        if (result.isError()) {
+            errln("Error occurred while encoding: " + charsetName + " with error: " + result);
+        }
+        cb.limit(6);
+        result = encoder.encode(cb, bb, true);
+        if (!result.isError()) {
+            errln("Error should have occurred while encoding: " + charsetName);
+        }
+    }
 }