ICU-6266 Remove utf8friendly code in ICU4J sense it is not needed and add additional test to improve code coverage.

X-SVN-Rev: 24100
2025-04-13 08:53:20 +00:00 · 2008-06-03 20:24:33 +00:00 · 2008-06-03 20:24:33 +00:00 · 534b14f417
commit 534b14f417
parent 7606da8922
2 changed files with 311 additions and 550 deletions
--- a/icu4j/src/com/ibm/icu/charset/CharsetMBCS.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetMBCS.java
@ -280,7 +280,8 @@ class CharsetMBCS extends CharsetICU {
            String baseName;

            /* extension-only file, load the base table and set values appropriately */
-            if ((extIndexes = mbcsTable.extIndexes) == null) {
+            extIndexes = mbcsTable.extIndexes;
+            if (extIndexes == null) {
                /* extension-only file without extension */
                throw new InvalidFormatException();
            }
@ -473,9 +474,9 @@ class CharsetMBCS extends CharsetICU {
                     * MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher.
                     * The .cnv file is prebuilt with an additional stage table with indexes to each block.
                     */
-                    ByteBuffer tmpbb = ByteBuffer.wrap(mbcsTable.fromUnicodeBytes);
-                    tmpbb.position(noFromU ? 0 : mbcsTable.fromUBytesLength);
-                    mbcsTable.mbcsIndex = tmpbb.asCharBuffer();
+                    if (noFromU) {
+                        mbcsTable.mbcsIndex = ByteBuffer.wrap(mbcsTable.fromUnicodeBytes).asCharBuffer();
+                    }
                    mbcsTable.maxFastUChar = (char)((header.version[2]<<8) | 0xff);
                }
            }
@ -1488,89 +1489,48 @@ class CharsetMBCS extends CharsetICU {

                if (byteIndex == 0) {
                    /* optimized loop for 1/2-byte input and BMP output */
-                    if (offsets == null) {
-                        do {
-                            entry = stateTable[state][source.get(sourceArrayIndex)
-                                    & UConverterConstants.UNSIGNED_BYTE_MASK];
-                            if (MBCS_ENTRY_IS_TRANSITION(entry)) {
-                                state = (byte) MBCS_ENTRY_TRANSITION_STATE(entry);
-                                offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);
-
+                    // agljport:todo see ucnvmbcs.c for deleted block
+                    do {
+                        entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK];
+                        if (MBCS_ENTRY_IS_TRANSITION(entry)) {
+                            state = (byte) MBCS_ENTRY_TRANSITION_STATE(entry);
+                            offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);
+                            ++sourceArrayIndex;
+                            if (sourceArrayIndex < source.limit()
+                                    && MBCS_ENTRY_IS_FINAL(entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK])
+                                    && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16
+                                    && (c = unicodeCodeUnits[offset + MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {
                                ++sourceArrayIndex;
-                                if (sourceArrayIndex < source.limit()
-                                        && MBCS_ENTRY_IS_FINAL(entry = stateTable[state][source.get(sourceArrayIndex)
-                                                & UConverterConstants.UNSIGNED_BYTE_MASK])
-                                        && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16
-                                        && (c = unicodeCodeUnits[offset + MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {
-                                    ++sourceArrayIndex;
-                                    target.put(c);
-                                    state = (byte) MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
-                                    offset = 0;
-                                } else {
-                                    /* set the state and leave the optimized loop */
-                                    bytes[0] = source.get(sourceArrayIndex - 1);
-                                    byteIndex = 1;
-                                    break;
+                                target.put(c);
+                                if (offsets != null) {
+                                    offsets.put(sourceIndex);
+                                    sourceIndex = (nextSourceIndex += 2);
                                }
+                                state = (byte) MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+                                offset = 0;
                            } else {
-                                if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
-                                    /* output BMP code point */
-                                    ++sourceArrayIndex;
-                                    target.put((char) MBCS_ENTRY_FINAL_VALUE_16(entry));
-                                    state = (byte) MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
-                                } else {
-                                    /* leave the optimized loop */
-                                    break;
-                                }
+                                /* set the state and leave the optimized loop */
+                                ++nextSourceIndex;
+                                bytes[0] = source.get(sourceArrayIndex - 1);
+                                byteIndex = 1;
+                                break;
                            }
-                        } while (sourceArrayIndex < source.limit() && target.hasRemaining());
-                    } else /* offsets!=NULL */{
-                        // agljport:todo see ucnvmbcs.c for deleted block
-                        do {
-                            entry = stateTable[state][source.get(sourceArrayIndex)];
-                            if (MBCS_ENTRY_IS_TRANSITION(entry)) {
-                                state = (byte) MBCS_ENTRY_TRANSITION_STATE(entry);
-                                offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);
-
+                        } else {
+                            if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
+                                /* output BMP code point */
                                ++sourceArrayIndex;
-                                if (sourceArrayIndex < source.limit()
-                                        && MBCS_ENTRY_IS_FINAL(entry = stateTable[state][source.get(sourceArrayIndex)])
-                                        && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16
-                                        && (c = unicodeCodeUnits[offset + MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {
-
-                                    ++sourceArrayIndex;
-                                    target.put(c);
-                                    if (offsets != null) {
-                                        offsets.put(sourceIndex);
-                                        sourceIndex = (nextSourceIndex += 2);
-                                    }
-                                    state = (byte) MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
-                                    offset = 0;
-                                } else {
-                                    /* set the state and leave the optimized loop */
-                                    ++nextSourceIndex;
-                                    bytes[0] = source.get(sourceArrayIndex - 1);
-                                    byteIndex = 1;
-                                    break;
+                                target.put((char) MBCS_ENTRY_FINAL_VALUE_16(entry));
+                                if (offsets != null) {
+                                    offsets.put(sourceIndex);
+                                    sourceIndex = ++nextSourceIndex;
                                }
+                                state = (byte) MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
                            } else {
-                                if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
-                                    /* output BMP code point */
-                                    ++sourceArrayIndex;
-                                    target.put((char) MBCS_ENTRY_FINAL_VALUE_16(entry));
-                                    if (offsets != null) {
-                                        offsets.put(sourceIndex);
-                                        sourceIndex = ++nextSourceIndex;
-                                    }
-                                    state = (byte) MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
-                                } else {
-                                    /* leave the optimized loop */
-                                    break;
-                                }
+                                /* leave the optimized loop */
+                                break;
                            }
-                        } while (sourceArrayIndex < source.limit() && target.hasRemaining());
-                    }
-
+                        }
+                    } while (sourceArrayIndex < source.limit() && target.hasRemaining());
                    /*
                     * these tests and break statements could be put inside the loop if C had "break outerLoop" like
                     * Java
@ -2177,7 +2137,8 @@ class CharsetMBCS extends CharsetICU {
             * then load the DBCS state from the MBCS data
             * (dbcsOnlyState==0 if it is not a DBCS-only converter)
             */
-            if ((state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&this.mode)) == 0) {
+            state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&this.mode);
+            if (state == 0) {
                state = sharedData.mbcs.dbcsOnlyState;
            }
            
@ -2203,84 +2164,48 @@ class CharsetMBCS extends CharsetICU {
                
                if (byteIndex == 0) {
                    /* optimized loop for 1/2-byte input and BMP output */
-                    if (offsets == null) {
-                        do {
-                            entry = stateTable[state][(short)source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK];
-                            if (MBCS_ENTRY_IS_TRANSITION(entry)) {
-                                state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_TRANSITION_STATE(entry));
-                                offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);
-                                
+                    do {
+                        entry = stateTable[state][(short)source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK];
+                        if (MBCS_ENTRY_IS_TRANSITION(entry)) {
+                            state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_TRANSITION_STATE(entry));
+                            offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);
+                            
+                            source.get();
+                            if (source.hasRemaining() &&
+                                    MBCS_ENTRY_IS_FINAL(entry=stateTable[state][(short)source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK]) &&
+                                    MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16 &&
+                                    (c = unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {
                                source.get();
-                                if (source.hasRemaining() &&
-                                        MBCS_ENTRY_IS_FINAL(entry=stateTable[state][(short)source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK]) &&
-                                        MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16 &&
-                                        (c = unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {
-                                    source.get();
-                                    target.put(c);
-                                    state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_FINAL_STATE(entry)); /* typically 0 */
-                                    offset = 0;
-                                } else {
-                                    /* set the state and leave the optimized loop */
-                                    bytes[0] = source.get(source.position()-1);
-                                    byteIndex = 1;
-                                    break;
+                                target.put(c);
+                                if (offsets != null) {
+                                    offsets.put(sourceIndex);
+                                    sourceIndex = (nextSourceIndex + 2);
                                }
+                                state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_FINAL_STATE(entry)); /* typically 0 */
+                                offset = 0;
                            } else {
-                                if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
-                                    /* output BMP code point */
-                                    source.get();
-                                    target.put((char)MBCS_ENTRY_FINAL_VALUE_16(entry));
-                                    state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_FINAL_STATE(entry)); /* typically 0 */
-                                } else {
-                                    /* leave the optimized loop */
-                                    break;
-                                }
+                                /* set the state and leave the optimized loop */
+                                ++nextSourceIndex;
+                                bytes[0] = source.get(source.position()-1);
+                                byteIndex = 1;
+                                break;
                            }
-                        } while (source.hasRemaining() && target.hasRemaining());
-                    } else { /* offsets != null */
-                        do {
-                            entry = stateTable[state][(short)source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK];
-                            if (MBCS_ENTRY_IS_TRANSITION(entry)) {
-                                state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_TRANSITION_STATE(entry));
-                                offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);
-                                
+                        } else {
+                            if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
+                                /* output BMP code point */
                                source.get();
-                                if (source.hasRemaining() &&
-                                        MBCS_ENTRY_IS_FINAL(entry=stateTable[state][(short)source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK]) &&
-                                        MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16 &&
-                                        (c = unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {
-                                    source.get();
-                                    target.put(c);
-                                    if (offsets != null) {
-                                        offsets.put(sourceIndex);
-                                        sourceIndex = (nextSourceIndex + 2);
-                                    }
-                                    state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_FINAL_STATE(entry)); /* typically 0 */
-                                    offset = 0;
-                                } else {
-                                    /* set the state and leave the optimized loop */
-                                    ++nextSourceIndex;
-                                    bytes[0] = source.get(source.position()-1);
-                                    byteIndex = 1;
-                                    break;
+                                target.put((char)MBCS_ENTRY_FINAL_VALUE_16(entry));
+                                if (offsets != null) {
+                                    offsets.put(sourceIndex);
+                                    sourceIndex = ++nextSourceIndex;
                                }
+                                state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_FINAL_STATE(entry)); /* typically 0 */
                            } else {
-                                if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
-                                    /* output BMP code point */
-                                    source.get();
-                                    target.put((char)MBCS_ENTRY_FINAL_VALUE_16(entry));
-                                    if (offsets != null) {
-                                        offsets.put(sourceIndex);
-                                        sourceIndex = ++nextSourceIndex;
-                                    }
-                                    state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_FINAL_STATE(entry)); /* typically 0 */
-                                } else {
-                                    /* leave the optimized loop */
-                                    break;
-                                }
+                                /* leave the optimized loop */
+                                break;
                            }
-                        } while (source.hasRemaining() && target.hasRemaining());
-                    }
+                        }
+                    } while (source.hasRemaining() && target.hasRemaining());
                    
                    /* these tests and break statements could be put inside the loop
                     * if C had "break outerLoop" like Java
@ -2934,7 +2859,7 @@ class CharsetMBCS extends CharsetICU {
            short uniMask;
            // long asciiRoundtrips;
            
-            boolean utf8Friendly = false, gotoUnassigned = false;
+            boolean gotoUnassigned = false;

            try {

@ -3047,360 +2972,198 @@ class CharsetMBCS extends CharsetICU {
                                ++nextSourceIndex;

                                /*
-                                 * utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX
-                                 * to avoid dealing with surrogates.
-                                 * MBCS_FAST_MAX must be >=0xd7ff.
+                                 * This also tests if the codepage maps single surrogates. If it does, then surrogates
+                                 * are not paired but mapped separately. Note that in this case unmatched surrogates are
+                                 * not detected.
                                 */
-                                if (c <= 0xd7ff && sharedData.mbcs.mbcsIndex != null && sharedData.mbcs.mbcsIndex.hasRemaining()) {
-                                    utf8Friendly = true;
-                                    value = sharedData.mbcs.mbcsIndex.get(c>>6);
-                                    /* get the bytes and the length for the output (copied from below and adapted for utf8Friendly data) */
-                                    /* There are only roundtrips (!=0) and no-mapping (==0) entries. */
-                                    switch (outputType) {
-                                    case MBCS_OUTPUT_2:
-                                        value = bytes[value + (c&0x3f)] << 8 | bytes[value + (c&0x3f) + 1];
-                                        if (value <= 0xff) {
-                                            if (value == 0) {
-                                                gotoUnassigned = true;
-                                            } else {
-                                                length = 1;
-                                            }
-                                        } else {
-                                            length = 2;
+                                if (UTF16.isSurrogate((char) c)
+                                        && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
+                                    if (UTF16.isLeadSurrogate((char) c)) {
+                                        // getTrail:
+                                        SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex,
+                                                nextSourceIndex, prevSourceIndex, prevLength);
+                                        doloop = getTrail(source, target, uniMask, x, flush, cr);
+                                        c = x.c;
+                                        sourceArrayIndex = x.sourceArrayIndex;
+                                        sourceIndex = x.sourceIndex;
+                                        nextSourceIndex = x.nextSourceIndex;
+                                        prevSourceIndex = x.prevSourceIndex;
+
+                                        if (x.doread) {
+                                            if (doloop)
+                                                continue;
+                                            else
+                                                break;
                                        }
+                                    } else {
+                                        /* this is an unmatched trail code unit (2nd surrogate) */
+                                        /* callback(illegal) */
+                                        cr[0] = CoderResult.malformedForLength(1);
                                        break;
-                                    case MBCS_OUTPUT_2_SISO:
-                                        /* 1/2-byte stateful with Shift-In/Shift-Out */
-                                        /*
-                                         * Save the old state in the converter object
-                                         * right here, then change the local prevLength state variable if neccessary.
-                                         * Then, if this character turns out to be unassigned or a fallback that
-                                         * is not taken, the callback code must not save the new state in the converter
-                                         * because the new state is for a character that is not output.
-                                         * However, the callback must still restore the state from the converter
-                                         * in case the callback function changed it for its output.
-                                         */
-                                        fromUnicodeStatus = prevLength; /* save the old state */
-                                        value = bytes[value + (c&0x3f)] << 8 | bytes[value + (c&0x3f) + 1];
-                                        if (value <= 0xff) {
-                                            if (value == 0) {
-                                                gotoUnassigned = true;
-                                            } else if (prevLength <= 1) {
-                                                length = 1;
-                                            } else {
-                                                /* change from double=byte mode to single-byte */
-                                                value |= (int)UConverterConstants.SI<<8;
-                                                length = 2;
-                                                prevLength = 1;
-                                            }
-                                        } else {
-                                            if (prevLength == 2) {
-                                                length = 2;
-                                            } else {
-                                                /* change from single-byte mode to double-byte */
-                                                value |= (int)UConverterConstants.SO<<16;
-                                                length = 3;
-                                                prevLength = 2;
-                                            }
-                                        }
-                                        break;
-                                    case MBCS_OUTPUT_DBCS_ONLY:
-                                        /* table with single-byte results, but only DBCS mappings used */
-                                        value = bytes[value + (c&0x3f)] << 8 | bytes[value + (c&0x3f) + 1];
-                                        if (value <= 0xff) {
-                                            /* no mapping or SBCS result, not taken for DBCS-only */
-                                            gotoUnassigned = true;
-                                        } else {
-                                            length = 2;
-                                        }
-                                        break;
-                                    case MBCS_OUTPUT_3:
-                                        pArray = bytes;
-                                        pArrayIndex = (value+(c&0x3f)) * 3;
-                                        value = (int)(((pArray[pArrayIndex]<<16) & UConverterConstants.UNSIGNED_INT_MASK) 
-                                                | ((pArray[pArrayIndex + 1]<<8) & UConverterConstants.UNSIGNED_INT_MASK) 
-                                                | pArray[pArrayIndex + 2]);
-                                        if (((long)value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
-                                            if (value == 0) {
-                                                gotoUnassigned = true;
-                                            } else {
-                                                length = 1;
-                                            }
-                                        } else if (((long)value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
-                                            length = 2;
-                                        } else {
-                                            length = 3;
-                                        }
-                                        break;
-                                    case MBCS_OUTPUT_4:
-                                        value = bytes[value + (c&0x3f)] << 24 | bytes[value + (c&0x3f) + 1] << 16 | bytes[value + (c&0x3f) + 2] << 8 | bytes[value + (c&0x3f) + 3];
-                                        if (((long)value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
-                                            if (value == 0) {
-                                                gotoUnassigned = true;
-                                            } else {
-                                                length = 1;
-                                            }
-                                        } else if (((long)value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
-                                            length = 2;
-                                        } else if (((long)value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffffff) {
-                                            length = 3;
-                                        } else {
-                                            length = 4;
-                                        }
-                                        break;
-                                    case MBCS_OUTPUT_3_EUC:
-                                        value = bytes[value + (c&0x3f)] << 8 | bytes[value + (c&0x3f) + 1];
-                                        /* EUC 16-bit fixed-length representation */
-                                        if (value <= 0xff) {
-                                            if (value == 0) {
-                                                gotoUnassigned = true;
-                                            } else {
-                                                length = 1;
-                                            }
-                                        } else if ((value&0x8000) == 0) {
-                                            value|=0x8e8000;
-                                            length = 3;
-                                        } else if ((value&0x80) == 0) {
-                                            value|=0x8f0080;
-                                            length = 3;
-                                        } else {
-                                            length = 2;
-                                        }
-                                        break;
-                                    case MBCS_OUTPUT_4_EUC:
-                                        pArray = bytes;
-                                        pArrayIndex = (value+(c&0x3f)) * 3;
-                                        value = (int)(((pArray[pArrayIndex]<<16) & UConverterConstants.UNSIGNED_INT_MASK) 
-                                                | ((pArray[pArrayIndex + 1]<<8) & UConverterConstants.UNSIGNED_INT_MASK) 
-                                                | pArray[pArrayIndex + 2]);
-                                        /* EUC 16-bit fixed-length representation applied to the first two bytes */
-                                        if (((long)value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
-                                            if (value == 0) {
-                                                gotoUnassigned = true;
-                                            } else {
-                                                length = 1;
-                                            }
-                                        } else if (((long)value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
-                                            length = 2;
-                                        } else if ((value&0x800000) == 0) {
-                                            value|=0x8e800000;
-                                            length = 4;
-                                        } else if ((value&0x8000) == 0) {
-                                            value|=0x8f008000;
-                                            length = 4;
-                                        } else {
-                                            length = 3;
-                                        }
-                                        break;
-                                    default:
-                                        /* must not occur */
-                                        /*
-                                         * To avoid compiler warnings that value & length may be
-                                         * used without having been initialized, we set them here.
-                                         * In reality, this is unreachable code.
-                                         * Not having a default branch also causes warnings with
-                                         * some compilers.
-                                         */
-                                        value = length = 0;
-                                        break;
-                                    }
-                                } else {
-                                    utf8Friendly = false;
-                                    /*
-                                     * This also tests if the codepage maps single surrogates. If it does, then surrogates
-                                     * are not paired but mapped separately. Note that in this case unmatched surrogates are
-                                     * not detected.
-                                     */
-                                    if (UTF16.isSurrogate((char) c)
-                                            && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
-                                        if (UTF16.isLeadSurrogate((char) c)) {
-                                            // getTrail:
-                                            SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex,
-                                                    nextSourceIndex, prevSourceIndex, prevLength);
-                                            doloop = getTrail(source, target, uniMask, x, flush, cr);
-                                            c = x.c;
-                                            sourceArrayIndex = x.sourceArrayIndex;
-                                            sourceIndex = x.sourceIndex;
-                                            nextSourceIndex = x.nextSourceIndex;
-                                            prevSourceIndex = x.prevSourceIndex;
-    
-                                            if (x.doread) {
-                                                if (doloop)
-                                                    continue;
-                                                else
-                                                    break;
-                                            }
-                                        } else {
-                                            /* this is an unmatched trail code unit (2nd surrogate) */
-                                            /* callback(illegal) */
-                                            cr[0] = CoderResult.malformedForLength(1);
-                                            break;
-                                        }
                                    }
                                }
                            } else {
                                doread = true;
                            }
-                            if (!utf8Friendly) {
-                                /* convert the Unicode code point in c into codepage bytes */
-    
+                            /* convert the Unicode code point in c into codepage bytes */
+
+                            /*
+                             * The basic lookup is a triple-stage compact array (trie) lookup. For details see the
+                             * beginning of this file.
+                             * 
+                             * Single-byte codepages are handled with a different data structure by _MBCSSingle...
+                             * functions.
+                             * 
+                             * The result consists of a 32-bit value from stage 2 and a pointer to as many bytes as are
+                             * stored per character. The pointer points to the character's bytes in stage 3. Bits 15..0
+                             * of the stage 2 entry contain the stage 3 index for that pointer, while bits 31..16 are
+                             * flags for which of the 16 characters in the block are roundtrip-assigned.
+                             * 
+                             * For 2-byte and 4-byte codepages, the bytes are stored as uint16_t respectively as
+                             * uint32_t, in the platform encoding. For 3-byte codepages, the bytes are always stored in
+                             * big-endian order.
+                             * 
+                             * For EUC encodings that use only either 0x8e or 0x8f as the first byte of their longest
+                             * byte sequences, the first two bytes in this third stage indicate with their 7th bits
+                             * whether these bytes are to be written directly or actually need to be preceeded by one of
+                             * the two Single-Shift codes. With this, the third stage stores one byte fewer per
+                             * character than the actual maximum length of EUC byte sequences.
+                             * 
+                             * Other than that, leading zero bytes are removed and the other bytes output. A single zero
+                             * byte may be output if the "assigned" bit in stage 2 was on. The data structure does not
+                             * support zero byte output as a fallback, and also does not allow output of leading zeros.
+                             */
+                            stage2Entry = MBCS_STAGE_2_FROM_U(table, c);
+
+                            /* get the bytes and the length for the output */
+                            switch (outputType) {
+                            /* This is handled above with the method cnvMBCSDoubleFromUnicodeWithOffsets() */
+                            /* case MBCS_OUTPUT_2:
+                                value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+                                if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+                                    length = 1;
+                                } else {
+                                    length = 2;
+                                }
+                                break; */
+                            case MBCS_OUTPUT_2_SISO:
+                                /* 1/2-byte stateful with Shift-In/Shift-Out */
                                /*
-                                 * The basic lookup is a triple-stage compact array (trie) lookup. For details see the
-                                 * beginning of this file.
-                                 * 
-                                 * Single-byte codepages are handled with a different data structure by _MBCSSingle...
-                                 * functions.
-                                 * 
-                                 * The result consists of a 32-bit value from stage 2 and a pointer to as many bytes as are
-                                 * stored per character. The pointer points to the character's bytes in stage 3. Bits 15..0
-                                 * of the stage 2 entry contain the stage 3 index for that pointer, while bits 31..16 are
-                                 * flags for which of the 16 characters in the block are roundtrip-assigned.
-                                 * 
-                                 * For 2-byte and 4-byte codepages, the bytes are stored as uint16_t respectively as
-                                 * uint32_t, in the platform encoding. For 3-byte codepages, the bytes are always stored in
-                                 * big-endian order.
-                                 * 
-                                 * For EUC encodings that use only either 0x8e or 0x8f as the first byte of their longest
-                                 * byte sequences, the first two bytes in this third stage indicate with their 7th bits
-                                 * whether these bytes are to be written directly or actually need to be preceeded by one of
-                                 * the two Single-Shift codes. With this, the third stage stores one byte fewer per
-                                 * character than the actual maximum length of EUC byte sequences.
-                                 * 
-                                 * Other than that, leading zero bytes are removed and the other bytes output. A single zero
-                                 * byte may be output if the "assigned" bit in stage 2 was on. The data structure does not
-                                 * support zero byte output as a fallback, and also does not allow output of leading zeros.
+                                 * Save the old state in the converter object right here, then change the local
+                                 * prevLength state variable if necessary. Then, if this character turns out to be
+                                 * unassigned or a fallback that is not taken, the callback code must not save the new
+                                 * state in the converter because the new state is for a character that is not output.
+                                 * However, the callback must still restore the state from the converter in case the
+                                 * callback function changed it for its output.
                                 */
-                                stage2Entry = MBCS_STAGE_2_FROM_U(table, c);
-    
-                                /* get the bytes and the length for the output */
-                                switch (outputType) {
-                                case MBCS_OUTPUT_2:
-                                    value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
-                                    if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
-                                        length = 1;
-                                    } else {
-                                        length = 2;
-                                    }
-                                    break;
-                                case MBCS_OUTPUT_2_SISO:
-                                    /* 1/2-byte stateful with Shift-In/Shift-Out */
-                                    /*
-                                     * Save the old state in the converter object right here, then change the local
-                                     * prevLength state variable if necessary. Then, if this character turns out to be
-                                     * unassigned or a fallback that is not taken, the callback code must not save the new
-                                     * state in the converter because the new state is for a character that is not output.
-                                     * However, the callback must still restore the state from the converter in case the
-                                     * callback function changed it for its output.
-                                     */
-                                    fromUnicodeStatus = prevLength; /* save the old state */
-                                    value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
-                                    if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
-                                        if (value == 0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) == false) {
-                                            /* no mapping, leave value==0 */
-                                            length = 0;
-                                        } else if (prevLength <= 1) {
-                                            length = 1;
-                                        } else {
-                                            /* change from double-byte mode to single-byte */
-                                            value |= UConverterConstants.SI << 8;
-                                            length = 2;
-                                            prevLength = 1;
-                                        }
-                                    } else {
-                                        if (prevLength == 2) {
-                                            length = 2;
-                                        } else {
-                                            /* change from single-byte mode to double-byte */
-                                            value |= UConverterConstants.SO << 16;
-                                            length = 3;
-                                            prevLength = 2;
-                                        }
-                                    }
-                                    break;
-                                case MBCS_OUTPUT_DBCS_ONLY:
-                                    /* table with single-byte results, but only DBCS mappings used */
-                                    value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
-                                    if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
-                                        /* no mapping or SBCS result, not taken for DBCS-only */
-                                        value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */
+                                fromUnicodeStatus = prevLength; /* save the old state */
+                                value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+                                if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+                                    if (value == 0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) == false) {
+                                        /* no mapping, leave value==0 */
                                        length = 0;
-                                    } else {
-                                        length = 2;
-                                    }
-                                    break;
-                                case MBCS_OUTPUT_3:
-                                    pArray = bytes;
-                                    pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
-                                    value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
-                                            | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
-                                            | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);
-                                    if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+                                    } else if (prevLength <= 1) {
                                        length = 1;
-                                    } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
+                                    } else {
+                                        /* change from double-byte mode to single-byte */
+                                        value |= UConverterConstants.SI << 8;
+                                        length = 2;
+                                        prevLength = 1;
+                                    }
+                                } else {
+                                    if (prevLength == 2) {
                                        length = 2;
                                    } else {
+                                        /* change from single-byte mode to double-byte */
+                                        value |= UConverterConstants.SO << 16;
                                        length = 3;
+                                        prevLength = 2;
                                    }
-                                    break;
-                                case MBCS_OUTPUT_4:
-                                    value = MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c);
-                                    if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
-                                        length = 1;
-                                    } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
-                                        length = 2;
-                                    } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffffff) {
-                                        length = 3;
-                                    } else {
-                                        length = 4;
-                                    }
-                                    break;
-                                case MBCS_OUTPUT_3_EUC:
-                                    value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
-                                    /* EUC 16-bit fixed-length representation */
-                                    if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
-                                        length = 1;
-                                    } else if ((value & 0x8000) == 0) {
-                                        value |= 0x8e8000;
-                                        length = 3;
-                                    } else if ((value & 0x80) == 0) {
-                                        value |= 0x8f0080;
-                                        length = 3;
-                                    } else {
-                                        length = 2;
-                                    }
-                                    break;
-                                case MBCS_OUTPUT_4_EUC:
-                                    pArray = bytes;
-                                    pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
-                                    value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
-                                            | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
-                                            | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);
-                                    /* EUC 16-bit fixed-length representation applied to the first two bytes */
-                                    if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
-                                        length = 1;
-                                    } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
-                                        length = 2;
-                                    } else if ((value & 0x800000) == 0) {
-                                        value |= 0x8e800000;
-                                        length = 4;
-                                    } else if ((value & 0x8000) == 0) {
-                                        value |= 0x8f008000;
-                                        length = 4;
-                                    } else {
-                                        length = 3;
-                                    }
-                                    break;
-                                default:
-                                    /* must not occur */
-                                    /*
-                                     * To avoid compiler warnings that value & length may be used without having been
-                                     * initialized, we set them here. In reality, this is unreachable code. Not having a
-                                     * default branch also causes warnings with some compilers.
-                                     */
+                                }
+                                break;
+                            case MBCS_OUTPUT_DBCS_ONLY:
+                                /* table with single-byte results, but only DBCS mappings used */
+                                value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+                                if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+                                    /* no mapping or SBCS result, not taken for DBCS-only */
                                    value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */
                                    length = 0;
-                                    break;
+                                } else {
+                                    length = 2;
                                }
+                                break;
+                            case MBCS_OUTPUT_3:
+                                pArray = bytes;
+                                pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
+                                value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
+                                        | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
+                                        | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);
+                                if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+                                    length = 1;
+                                } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
+                                    length = 2;
+                                } else {
+                                    length = 3;
+                                }
+                                break;
+                            case MBCS_OUTPUT_4:
+                                value = MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c);
+                                if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+                                    length = 1;
+                                } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
+                                    length = 2;
+                                } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffffff) {
+                                    length = 3;
+                                } else {
+                                    length = 4;
+                                }
+                                break;
+                            case MBCS_OUTPUT_3_EUC:
+                                value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+                                /* EUC 16-bit fixed-length representation */
+                                if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+                                    length = 1;
+                                } else if ((value & 0x8000) == 0) {
+                                    value |= 0x8e8000;
+                                    length = 3;
+                                } else if ((value & 0x80) == 0) {
+                                    value |= 0x8f0080;
+                                    length = 3;
+                                } else {
+                                    length = 2;
+                                }
+                                break;
+                            case MBCS_OUTPUT_4_EUC:
+                                pArray = bytes;
+                                pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
+                                value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
+                                        | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
+                                        | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);
+                                /* EUC 16-bit fixed-length representation applied to the first two bytes */
+                                if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+                                    length = 1;
+                                } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
+                                    length = 2;
+                                } else if ((value & 0x800000) == 0) {
+                                    value |= 0x8e800000;
+                                    length = 4;
+                                } else if ((value & 0x8000) == 0) {
+                                    value |= 0x8f008000;
+                                    length = 4;
+                                } else {
+                                    length = 3;
+                                }
+                                break;
+                            default:
+                                /* must not occur */
+                                /*
+                                 * To avoid compiler warnings that value & length may be used without having been
+                                 * initialized, we set them here. In reality, this is unreachable code. Not having a
+                                 * default branch also causes warnings with some compilers.
+                                 */
+                                value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */
+                                length = 0;
+                                break;
                            }
                            
                            /* is this code point assigned, or do we use fallbacks? */
@ -3430,40 +3193,31 @@ class CharsetMBCS extends CharsetICU {
                            /* write the output character bytes from value and length */
                            /* from the first if in the loop we know that targetCapacity>0 */
                            if (length <= target.remaining()) {
-                                if (offsets == null) {
-                                    switch (length) {
-                                    /* each branch falls through to the next one */
-                                    case 4:
-                                        target.put((byte) (value >>> 24));
-                                    case 3:
-                                        target.put((byte) (value >>> 16));
-                                    case 2:
-                                        target.put((byte) (value >>> 8));
-                                    case 1:
-                                        target.put((byte) value);
-                                    default:
-                                        /* will never occur */
-                                        break;
+                                switch (length) {
+                                /* each branch falls through to the next one */
+                                case 4:
+                                    target.put((byte) (value >>> 24));
+                                    if (offsets != null) {
+                                        offsets.put(sourceIndex);
                                    }
-                                } else {
-                                    switch (length) {
-                                    /* each branch falls through to the next one */
-                                    case 4:
-                                        target.put((byte) (value >>> 24));
+                                case 3:
+                                    target.put((byte) (value >>> 16));
+                                    if (offsets != null) {
                                        offsets.put(sourceIndex);
-                                    case 3:
-                                        target.put((byte) (value >>> 16));
-                                        offsets.put(sourceIndex);
-                                    case 2:
-                                        target.put((byte) (value >>> 8));
-                                        offsets.put(sourceIndex);
-                                    case 1:
-                                        target.put((byte) value);
-                                        offsets.put(sourceIndex);
-                                    default:
-                                        /* will never occur */
-                                        break;
                                    }
+                                case 2:
+                                    target.put((byte) (value >>> 8));
+                                    if (offsets != null) {
+                                        offsets.put(sourceIndex);
+                                    }
+                                case 1:
+                                    target.put((byte) value);
+                                    if (offsets != null) {
+                                        offsets.put(sourceIndex);
+                                    }
+                                default:
+                                    /* will never occur */
+                                    break;
                                }
                            } else {
                                int errorBufferArrayIndex;
@ -4501,41 +4255,33 @@ class CharsetMBCS extends CharsetICU {
                    /* write the output character bytes from value and length */
                    /* from the first if in the loop we know that targetCapacity>0 */
                    if (length <= targetCapacity) {
-                        if (offsets == null) {
-                            switch (length) {
-                                /* each branch falls through to the next one */
-                            case 4:
-                                target.put((byte)(value>>24));
-                            case 3:
-                                target.put((byte)(value>>16));
-                            case 2:
-                                target.put((byte)(value>>8));
-                            case 1:
-                                target.put((byte)value);
-                            default :
-                                /* will never occur */
-                                break;
+                        switch (length) {
+                        /* each branch falls through to the next one */
+                        case 4:
+                            target.put((byte)(value>>24));
+                            if (offsets != null) {
+                                offsets.put(x.sourceIndex);
                            }
-                        } else {
-                            switch (length) {
-                                /* each branch falls through to the next one */
-                            case 4:
-                                target.put((byte)(value>>24));
+                        case 3:
+                            target.put((byte)(value>>16));
+                            if (offsets != null) {
                                offsets.put(x.sourceIndex);
-                            case 3:
-                                target.put((byte)(value>>16));
-                                offsets.put(x.sourceIndex);
-                            case 2:
-                                target.put((byte)(value>>8));
-                                offsets.put(x.sourceIndex);
-                            case 1:
-                                target.put((byte)value);
-                                offsets.put(x.sourceIndex);
-                            default :
-                                /* will never occur */
-                                break;
                            }
+                        case 2:
+                            target.put((byte)(value>>8));
+                            if (offsets != null) {
+                                offsets.put(x.sourceIndex);
+                            }
+                        case 1:
+                            target.put((byte)value);
+                            if (offsets != null) {
+                                offsets.put(x.sourceIndex);
+                            }
+                        default :
+                            /* will never occur */
+                            break;
                        }
+                        
                        targetCapacity -= length;
                    } else {
                        /*
@ -5706,8 +5452,8 @@ class CharsetMBCS extends CharsetICU {
            if(st2>stage1Length) {
                ps2 = st2;
                for(st2=0;st2<64;++st2){
-                    
-                    if((st3=((int) stage12.get(ps2+st2))<<STAGE_2_LEFT_SHIFT)!= 0){
+                    st3=((int) stage12.get(ps2+st2))<<STAGE_2_LEFT_SHIFT;
+                    if(st3!= 0){
                        ps3 = st3;
                        do {
                            value = stage3b.get((int)(UConverterConstants.UNSIGNED_SHORT_MASK&stage3.get(ps3++)));
--- a/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java
+++ b/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java
@ -4808,6 +4808,21 @@ public class TestCharset extends TestFmwk {
        if (!result.isOverflow()) {
            errln("Overflow buffer while decoding ISO-2022-KR should have occurred.");
        }
+        
+        /* This is part of the ambiguous converter test in ICU4C and is used here to provide
+         * better code coverage.
+         */
+        byte [] bytearray2 = {
+                0x61, 0x5b, 0x5c
+        };
+        
+        bb = ByteBuffer.wrap(bytearray2);
+        cb = CharBuffer.allocate(20);
+        
+        result = decoder.decode(bb, cb, true);
+        if (!result.isMalformed()) {
+            errln("Malformed error while decoding ISO-2022-KR should have occurred.");
+        }
    }
    
    //provide better code coverage for Charset ISO-2022-JP