ICU-2449 refactor conversion - call fromUnicode callbacks only from ucnv.c framework

X-SVN-Rev: 12667
2025-04-06 22:15:31 +00:00 · 2003-07-24 00:28:47 +00:00 · 2003-07-24 00:28:47 +00:00 · 8ab9f23f3a
commit 8ab9f23f3a
parent cab6c351eb
13 changed files with 319 additions and 741 deletions
--- a/icu4c/source/common/ucnv.c
+++ b/icu4c/source/common/ucnv.c
@ -611,7 +611,7 @@ static void _reset(UConverter *converter, UConverterResetChoice choice,
    }
    if(choice!=UCNV_RESET_TO_UNICODE) {
        converter->fromUnicodeStatus = 0;
-        converter->fromUSurrogateLead = 0;
+        converter->fromUChar32 = 0;
        converter->invalidUCharLength = converter->charErrorBufferLength = 0;
    }

@ -864,7 +864,7 @@ _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
        converterSawEndOfInput=
            (UBool)(U_SUCCESS(*err) &&
                    pArgs->flush && pArgs->source==pArgs->sourceLimit &&
-                    cnv->fromUSurrogateLead==0);
+                    cnv->fromUChar32==0);

        /* no callback called yet for this iteration */
        calledCallback=FALSE;
@ -911,13 +911,11 @@ _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
                     * (continue converting by breaking out of only the inner loop)
                     */
                    break;
-                } else if(pArgs->flush && cnv->fromUSurrogateLead!=0) {
+                } else if(pArgs->flush && cnv->fromUChar32!=0) {
                    /*
                     * the entire input stream is consumed
                     * and there is a partial, truncated input sequence left
                     */
-                    cnv->invalidUCharBuffer[0]=(UChar)cnv->fromUSurrogateLead;
-                    cnv->invalidUCharLength=1;

                    /* inject an error and continue with callback handling */
                    *err=U_TRUNCATED_CHAR_FOUND;
@ -970,20 +968,15 @@ _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
            /* callback handling */
            {
                UChar32 codePoint;
-                int32_t i;

-                /* get the first code point */
-                i=0;
-                errorInputLength=cnv->invalidUCharLength;
-                if(errorInputLength>0) {
-                    U16_NEXT(cnv->invalidUCharBuffer, i, errorInputLength, codePoint);
-                } else {
-                    /* should never occur because errors should be caused by some input */
-                    codePoint=U_SENTINEL;
-                }
+                /* get and write the code point */
+                codePoint=cnv->fromUChar32;
+                errorInputLength=0;
+                U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
+                cnv->invalidUCharLength=(int8_t)errorInputLength;

                /* set the converter state to deal with the next character */
-                cnv->fromUSurrogateLead=0;
+                cnv->fromUChar32=0;

                /* call the callback function */
                cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
--- a/icu4c/source/common/ucnv2022.c
+++ b/icu4c/source/common/ucnv2022.c
@ -1279,70 +1279,6 @@ getEndOfBuffer_2022(const char** source,
    return sourceLimit;
 }

-/*
- * From Unicode Callback helper function
- */
-static void 
-fromUnicodeCallback(UConverterFromUnicodeArgs* args,const UChar32 sourceChar,const UChar** pSource,
-                    unsigned char** pTarget,int32_t** pOffsets,UConverterCallbackReason reason, UErrorCode* err){
-                
-    /*variables for callback */
-    const UChar* saveSource =NULL;
-    char* saveTarget =NULL;
-    int32_t* saveOffsets =NULL;
-    int currentOffset =0;
-    int saveIndex =0;
-    int32_t* offsets = *pOffsets;
-    const UChar* source = *pSource;
-    unsigned char* target = *pTarget;
-
-    args->converter->invalidUCharLength = 0;
-    
-    if(sourceChar>0xffff){
-        args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] =(uint16_t)(((sourceChar)>>10)+0xd7c0);
-        args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] =(uint16_t)(((sourceChar)&0x3ff)|0xdc00);
-    }
-    else{
-        args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] =(UChar)sourceChar;
-    }
-    if(offsets)
-        currentOffset = *(offsets-1)+1;
-
-    saveSource = args->source;
-    saveTarget = args->target;
-    saveOffsets = args->offsets;
-    args->target = (char*)target;
-    args->source = source;
-    args->offsets = offsets;
-
-    /*copies current values for the ErrorFunctor to update */
-    /*Calls the ErrorFunctor */
-    args->converter->fromUCharErrorBehaviour ( args->converter->fromUContext, 
-                  args, 
-                  args->converter->invalidUCharBuffer, 
-                  args->converter->invalidUCharLength, 
-                 (UChar32) (sourceChar), 
-                  reason, 
-                  err);
-
-    saveIndex = args->target - (char*)target;
-    if(args->offsets){
-        args->offsets = saveOffsets;
-        while(saveIndex-->0){
-             *offsets = currentOffset;
-              offsets++;
-        }
-    }
-    target = (unsigned char*)args->target;
-    *pTarget=target;
-    *pOffsets=offsets;
-    args->source=saveSource;
-    args->target=saveTarget;
-    args->offsets=saveOffsets;
-    args->converter->fromUSurrogateLead=0x00;
-
-}
-
 /*
 * To Unicode Callback helper function
 */
@ -1528,7 +1464,6 @@ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
    UChar32 sourceChar  =0x0000;
    const char* escSeq = NULL;
    int len =0; /*length of escSeq chars*/
-    UConverterCallbackReason reason;
    UConverterSharedData* sharedData=NULL;
    UBool useFallback; 

@ -1556,7 +1491,7 @@ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
    currentType         = &converterData->currentType;
    
    /* check if the last codepoint of previous buffer was a lead surrogate*/
-    if(args->converter->fromUSurrogateLead!=0 && target< targetLimit) {
+    if((sourceChar = args->converter->fromUChar32)!=0 && target< targetLimit) {
        goto getTrail;
    }
    
@ -1700,17 +1635,13 @@ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
                }
            }
            else{
-
                /* if we cannot find the character after checking all codepages 
                 * then this is an error
                 */
-                reason = UCNV_UNASSIGNED;
-                *err = U_INVALID_CHAR_FOUND;

                /*check if the char is a First surrogate*/
                if(UTF_IS_SURROGATE(sourceChar)) {
                    if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
-                        args->converter->fromUSurrogateLead=(UChar)sourceChar;
 getTrail:
                        /*look ahead to find the trail surrogate*/
                        if(source <  sourceLimit) {
@ -1718,36 +1649,31 @@ getTrail:
                            UChar trail=(UChar) *source;
                            if(UTF_IS_SECOND_SURROGATE(trail)) {
                                source++;
-                                sourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUSurrogateLead, trail);
-                                args->converter->fromUSurrogateLead=0x00;
-                                reason =UCNV_UNASSIGNED;
+                                sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
                                *err = U_INVALID_CHAR_FOUND;
                                /* convert this surrogate code point */
                                /* exit this condition tree */
                            } else {
                                /* this is an unmatched lead code unit (1st surrogate) */
                                /* callback(illegal) */
-                                reason=UCNV_ILLEGAL;
                                *err=U_ILLEGAL_CHAR_FOUND;
                            }
                        } else {
                            /* no more input */
                            *err = U_ZERO_ERROR;
-                            break;
                        }
                    } else {
                        /* this is an unmatched trail code unit (2nd surrogate) */
                        /* callback(illegal) */
-                        reason=UCNV_ILLEGAL;
                        *err=U_ILLEGAL_CHAR_FOUND;
                    }
+                } else {
+                    /* callback(unassigned) for a BMP code point */
+                    *err = U_INVALID_CHAR_FOUND;
                }
-                /* Call the callback function*/
-                fromUnicodeCallback(args,sourceChar,&source,&target,&offsets,reason,err);
-                initIterState = *currentState;
-                if (U_FAILURE (*err)){
-                    break;
-                }
+
+                args->converter->fromUChar32=sourceChar;
+                break;
            }
        } /* end if(myTargetIndex<myTargetLength) */
        else{
@ -2045,7 +1971,6 @@ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
    UBool isTargetByteDBCS;
    UBool oldIsTargetByteDBCS;
    UConverterDataISO2022 *converterData;
-    UConverterCallbackReason reason;
    UConverterSharedData* sharedData;
    UBool useFallback;
    int32_t length =0;
@ -2070,7 +1995,7 @@ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
    }
    
    isTargetByteDBCS   = (UBool) args->converter->fromUnicodeStatus;
-    if(args->converter->fromUSurrogateLead!=0 && target <targetLimit) {
+    if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
        goto getTrail;
    }
    while(source < sourceLimit){
@ -2140,13 +2065,10 @@ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
                /* oops.. the code point is unassingned
                 * set the error and reason
                 */
-                reason =UCNV_UNASSIGNED;
-                *err =U_INVALID_CHAR_FOUND;

                /*check if the char is a First surrogate*/
                if(UTF_IS_SURROGATE(sourceChar)) {
                    if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
-                        args->converter->fromUSurrogateLead=(UChar)sourceChar;
 getTrail:
                        /*look ahead to find the trail surrogate*/
                        if(source <  sourceLimit) {
@ -2154,38 +2076,32 @@ getTrail:
                            UChar trail=(UChar) *source;
                            if(UTF_IS_SECOND_SURROGATE(trail)) {
                                source++;
-                                sourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUSurrogateLead, trail);
-                                args->converter->fromUSurrogateLead=0x00;
+                                sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
                                *err = U_INVALID_CHAR_FOUND;
-                                reason =UCNV_UNASSIGNED;
                                /* convert this surrogate code point */
                                /* exit this condition tree */
                            } else {
                                /* this is an unmatched lead code unit (1st surrogate) */
                                /* callback(illegal) */
-                                reason=UCNV_ILLEGAL;
                                *err=U_ILLEGAL_CHAR_FOUND;
                            }
                        } else {
                            /* no more input */
                            *err = U_ZERO_ERROR;
-                            break;
                        }
                    } else {
                        /* this is an unmatched trail code unit (2nd surrogate) */
                        /* callback(illegal) */
-                        reason=UCNV_ILLEGAL;
                        *err=U_ILLEGAL_CHAR_FOUND;
                    }
+                } else {
+                    /* callback(unassigned) for a BMP code point */
+                    *err = U_INVALID_CHAR_FOUND;
                }
-                args->converter->fromUnicodeStatus = (int32_t)isTargetByteDBCS;
-                /* Call the callback function*/
-                fromUnicodeCallback(args,sourceChar,&source,&target,&offsets,reason,err);
-                isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;

-                if (U_FAILURE (*err)){
-                    break;
-                }
+                args->converter->fromUChar32=sourceChar;
+                args->converter->fromUnicodeStatus = (int32_t)isTargetByteDBCS;
+                break;
            }
        } /* end if(myTargetIndex<myTargetLength) */
        else{
@ -2542,7 +2458,6 @@ UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
    int len =0; /*length of escSeq chars*/
    uint32_t targetValue=0;
    uint8_t planeVal=0;
-    UConverterCallbackReason reason;
    UConverterSharedData* sharedData=NULL;
    UBool useFallback;

@ -2575,7 +2490,7 @@ UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
    sharedData        = (*currentConverter)->sharedData;

    /* check if the last codepoint of previous buffer was a lead surrogate*/
-    if(args->converter->fromUSurrogateLead!=0 && target< targetLimit) {
+    if((sourceChar = args->converter->fromUChar32)!=0 && target< targetLimit) {
        goto getTrail;
    }

@ -2591,7 +2506,6 @@ UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
            /*check if the char is a First surrogate*/
             if(UTF_IS_SURROGATE(sourceChar)) {
                if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
-                    args->converter->fromUSurrogateLead=(UChar)sourceChar;
 getTrail:
                    /*look ahead to find the trail surrogate*/
                    if(source < sourceLimit) {
@ -2599,28 +2513,28 @@ getTrail:
                        UChar trail=(UChar) *source;
                        if(UTF_IS_SECOND_SURROGATE(trail)) {
                            source++;
-                            /*(((args->converter->fromUSurrogateLead)<<10L)+(trail)-((0xd800<<10L)+0xdc00-0x10000))*/
-                            sourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUSurrogateLead, trail);
-                            args->converter->fromUSurrogateLead=0x00;
-                            /* convert this surrogate code point */
+                            sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
+                            args->converter->fromUChar32=0x00;
+                            /* convert this supplementary code point */
                            /* exit this condition tree */
                        } else {
                            /* this is an unmatched lead code unit (1st surrogate) */
                            /* callback(illegal) */
-                            reason=UCNV_ILLEGAL;
                            *err=U_ILLEGAL_CHAR_FOUND;
-                            goto callback;
+                            args->converter->fromUChar32=sourceChar;
+                            break;
                        }
                    } else {
                        /* no more input */
+                        args->converter->fromUChar32=sourceChar;
                        break;
                    }
                } else {
                    /* this is an unmatched trail code unit (2nd surrogate) */
                    /* callback(illegal) */
-                    reason=UCNV_ILLEGAL;
                    *err=U_ILLEGAL_CHAR_FOUND;
-                    goto callback;
+                    args->converter->fromUChar32=sourceChar;
+                    break;
                }
            }

@ -2755,20 +2669,12 @@ getTrail:

            }
            else{
-
                /* if we cannot find the character after checking all codepages 
                 * then this is an error
                 */
-                reason = UCNV_UNASSIGNED;
                *err = U_INVALID_CHAR_FOUND;
-callback:
-
-                fromUnicodeCallback(args,sourceChar,&source,&target,&offsets,reason,err);
-                initIterState = *currentState;
-               
-                if (U_FAILURE (*err)){
-                    break;
-                }
+                args->converter->fromUChar32=sourceChar;
+                break;
            }
        } /* end if(myTargetIndex<myTargetLength) */
        else{
--- a/icu4c/source/common/ucnv_bld.h
+++ b/icu4c/source/common/ucnv_bld.h
@ -148,7 +148,17 @@ struct UConverter {
    uint32_t toUnicodeStatus;           /* Used to internalize stream status information */
    int32_t mode;
    uint32_t fromUnicodeStatus;
-    UChar    fromUSurrogateLead;        /* similar to toUBytes; keeps the lead surrogate of the current character */
+
+    /*
+     * More fromUnicode() status. Serves 3 purposes:
+     * - keeps a lead surrogate between buffers (similar to toUBytes[])
+     * - keeps a lead surrogate at the end of the stream,
+     *   which the framework handles as truncated input
+     * - if the fromUnicode() implementation returns to the framework
+     *   (ucnv.c ucnv_fromUnicode()), then the framework calls the callback
+     *   for this code point
+     */
+    UChar32 fromUChar32;

    int8_t subCharLen;                  /* length of the codepage specific character sequence */
    int8_t invalidCharLength;
--- a/icu4c/source/common/ucnv_cnv.c
+++ b/icu4c/source/common/ucnv_cnv.c
@ -116,3 +116,43 @@ ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
    uset_addRange(set, 0, 0xd7ff);
    uset_addRange(set, 0xe000, 0x10ffff);
 }
+
+U_CFUNC void
+ucnv_fromUWriteBytes(UConverter *cnv,
+                     const char *bytes, int32_t length,
+                     char **target, const char *targetLimit,
+                     int32_t **offsets,
+                     int32_t sourceIndex,
+                     UErrorCode *pErrorCode) {
+    char *t=*target;
+    int32_t *o;
+
+    /* write bytes */
+    if(offsets==NULL || (o=*offsets)==NULL) {
+        while(length>0 && t<targetLimit) {
+            *t++=*bytes++;
+            --length;
+        }
+    } else {
+        /* output with offsets */
+        while(length>0 && t<targetLimit) {
+            *t++=*bytes++;
+            *o++=sourceIndex;
+            --length;
+        }
+        *offsets=o;
+    }
+    *target=t;
+
+    /* write overflow */
+    if(length>0) {
+        if(cnv!=NULL) {
+            t=(char *)cnv->charErrorBuffer;
+            cnv->charErrorBufferLength=(int8_t)length;
+            do {
+                *t++=(uint8_t)*bytes++;
+            } while(--length>0);
+        }
+        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+    }
+}
--- a/icu4c/source/common/ucnv_cnv.h
+++ b/icu4c/source/common/ucnv_cnv.h
@ -229,4 +229,12 @@ ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
                               UConverterUnicodeSet which,
                               UErrorCode *pErrorCode);

+U_CFUNC void
+ucnv_fromUWriteBytes(UConverter *cnv,
+                     const char *bytes, int32_t length,
+                     char **target, const char *targetLimit,
+                     int32_t **offsets,
+                     int32_t sourceIndex,
+                     UErrorCode *pErrorCode);
+
 #endif /* UCNV_CNV */
--- a/icu4c/source/common/ucnv_u32.c
+++ b/icu4c/source/common/ucnv_u32.c
@ -311,10 +311,10 @@ T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args,

    temp[0] = 0;

-    if (args->converter->fromUSurrogateLead)
+    if (args->converter->fromUChar32)
    {
-        ch = args->converter->fromUSurrogateLead;
-        args->converter->fromUSurrogateLead = 0;
+        ch = args->converter->fromUChar32;
+        args->converter->fromUChar32 = 0;
        goto lowsurogate;
    }

@ -346,7 +346,7 @@ lowsurogate:
            else if (!args->flush)
            {
                /* ran out of source */
-                args->converter->fromUSurrogateLead = (UChar)ch;
+                args->converter->fromUChar32 = ch;
                break;
            }
 #endif
@ -396,10 +396,10 @@ T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,

    temp[0] = 0;

-    if (args->converter->fromUSurrogateLead)
+    if (args->converter->fromUChar32)
    {
-        ch = args->converter->fromUSurrogateLead;
-        args->converter->fromUSurrogateLead = 0;
+        ch = args->converter->fromUChar32;
+        args->converter->fromUChar32 = 0;
        goto lowsurogate;
    }

@ -423,7 +423,7 @@ lowsurogate:
            else if (!args->flush)
            {
                /* ran out of source */
-                args->converter->fromUSurrogateLead = (UChar)ch;
+                args->converter->fromUChar32 = ch;
                break;
            }
 #endif
@ -790,10 +790,10 @@ T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args,

    temp[3] = 0;

-    if (args->converter->fromUSurrogateLead)
+    if (args->converter->fromUChar32)
    {
-        ch = args->converter->fromUSurrogateLead;
-        args->converter->fromUSurrogateLead = 0;
+        ch = args->converter->fromUChar32;
+        args->converter->fromUChar32 = 0;
        goto lowsurogate;
    }

@ -817,7 +817,7 @@ lowsurogate:
            else if (!args->flush)
            {
                /* ran out of source */
-                args->converter->fromUSurrogateLead = (UChar)ch;
+                args->converter->fromUChar32 = ch;
                break;
            }
 #endif
@ -867,10 +867,10 @@ T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,

    temp[3] = 0;

-    if (args->converter->fromUSurrogateLead)
+    if (args->converter->fromUChar32)
    {
-        ch = args->converter->fromUSurrogateLead;
-        args->converter->fromUSurrogateLead = 0;
+        ch = args->converter->fromUChar32;
+        args->converter->fromUChar32 = 0;
        goto lowsurogate;
    }

@ -894,7 +894,7 @@ lowsurogate:
            else if (!args->flush)
            {
                /* ran out of source */
-                args->converter->fromUSurrogateLead = (UChar)ch;
+                args->converter->fromUChar32 = ch;
                break;
            }
 #endif
--- a/icu4c/source/common/ucnv_u8.c
+++ b/icu4c/source/common/ucnv_u8.c
@ -443,14 +443,14 @@ U_CFUNC void T_UConverter_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
    const UChar *sourceLimit = args->sourceLimit;
    const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
    UBool isCESU8 = (UBool)(args->converter->sharedData == &_CESU8Data);
-    uint32_t ch, ch2;
+    UChar32 ch, ch2;
    int16_t indexToWrite;
    char temp[4];

-    if (cnv->fromUSurrogateLead && myTarget < targetLimit)
+    if (cnv->fromUChar32 && myTarget < targetLimit)
    {
-        ch = cnv->fromUSurrogateLead;
-        cnv->fromUSurrogateLead = 0;
+        ch = cnv->fromUChar32;
+        cnv->fromUChar32 = 0;
        goto lowsurrogate;
    }

@ -494,63 +494,21 @@ lowsurrogate:
                        } else {
                            /* this is an unmatched lead code unit (1st surrogate) */
                            /* callback(illegal) */
-                            ch2 = ch;
+                            cnv->fromUChar32 = ch;
+                            *err = U_ILLEGAL_CHAR_FOUND;
+                            break;
                        }
                    } else {
                        /* no more input */
-                        cnv->fromUSurrogateLead = (UChar)ch;
+                        cnv->fromUChar32 = ch;
                        break;
                    }
                } else {
                    /* this is an unmatched trail code unit (2nd surrogate) */
                    /* callback(illegal) */
-                    ch2 = ch;
-                }
-
-                if(ch2 != 0) {
-                    /* call the callback function with all the preparations and post-processing */
+                    cnv->fromUChar32 = ch;
                    *err = U_ILLEGAL_CHAR_FOUND;
-
-                    /* update the arguments structure */
-                    args->source=mySource;
-                    args->target=(char *)myTarget;
-
-                    /* write the code point as code units */
-                    cnv->invalidUCharBuffer[0] = (UChar)ch2;
-                    cnv->invalidUCharLength = 1;
-
-                    /* call the callback function */
-                    cnv->fromUCharErrorBehaviour(cnv->fromUContext, args, cnv->invalidUCharBuffer, 1, ch2, UCNV_ILLEGAL, err);
-
-                    /* get the converter state from UConverter */
-                    ch = cnv->fromUSurrogateLead;
-                    cnv->fromUSurrogateLead = 0;
-
-                    myTarget=(uint8_t *)args->target;
-                    mySource=args->source;
-
-                    /*
-                     * If the callback overflowed the target, then we need to
-                     * stop here with an overflow indication.
-                     */
-                    if(*err==U_BUFFER_OVERFLOW_ERROR) {
-                        break;
-                    } else if(U_FAILURE(*err)) {
-                        /* break on error */
-                        break;
-                    } else if(cnv->charErrorBufferLength>0) {
-                        /* target is full */
-                        *err=U_BUFFER_OVERFLOW_ERROR;
-                        break;
-                        /*
-                         * } else if(ch != 0) { ...
-                         * ### TODO 2002jul01 markus: It looks like this code (from ucnvmbcs.c)
-                         * does not handle the case where the callback leaves ch=fromUSurrogateLead!=0 .
-                         * We would have to check myTarget<targetLimit and goto lowsurrogate?!
-                         */
-                    }
-
-                    continue;
+                    break;
                }
            }

@ -602,15 +560,15 @@ U_CFUNC void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeA
    const UChar *sourceLimit = args->sourceLimit;
    const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
    UBool isCESU8 = (UBool)(args->converter->sharedData == &_CESU8Data);
-    uint32_t ch, ch2;
+    UChar32 ch, ch2;
    int32_t offsetNum, nextSourceIndex;
    int16_t indexToWrite;
    char temp[4];

-    if (cnv->fromUSurrogateLead && myTarget < targetLimit)
+    if (cnv->fromUChar32 && myTarget < targetLimit)
    {
-        ch = cnv->fromUSurrogateLead;
-        cnv->fromUSurrogateLead = 0;
+        ch = cnv->fromUChar32;
+        cnv->fromUChar32 = 0;
        offsetNum = -1;
        nextSourceIndex = 0;
        goto lowsurrogate;
@ -664,69 +622,21 @@ lowsurrogate:
                        } else {
                            /* this is an unmatched lead code unit (1st surrogate) */
                            /* callback(illegal) */
-                            ch2 = ch;
+                            cnv->fromUChar32 = ch;
+                            *err = U_ILLEGAL_CHAR_FOUND;
+                            break;
                        }
                    } else {
                        /* no more input */
-                        cnv->fromUSurrogateLead = (UChar)ch;
+                        cnv->fromUChar32 = ch;
                        break;
                    }
                } else {
                    /* this is an unmatched trail code unit (2nd surrogate) */
                    /* callback(illegal) */
-                    ch2 = ch;
-                }
-
-                if(ch2 != 0) {
-                    /* call the callback function with all the preparations and post-processing */
+                    cnv->fromUChar32 = ch;
                    *err = U_ILLEGAL_CHAR_FOUND;
-
-                    /* update the arguments structure */
-                    args->source=mySource;
-                    args->target=(char *)myTarget;
-                    args->offsets=myOffsets;
-
-                    /* write the code point as code units */
-                    cnv->invalidUCharBuffer[0] = (UChar)ch2;
-                    cnv->invalidUCharLength = 1;
-
-                    /* call the callback function */
-                    cnv->fromUCharErrorBehaviour(cnv->fromUContext, args, cnv->invalidUCharBuffer, 1, ch2, UCNV_ILLEGAL, err);
-
-                    /* get the converter state from UConverter */
-                    ch = cnv->fromUSurrogateLead;
-                    cnv->fromUSurrogateLead = 0;
-
-                    /* update target and deal with offsets if necessary */
-                    myOffsets=ucnv_updateCallbackOffsets(myOffsets, ((uint8_t *)args->target)-myTarget, offsetNum);
-                    myTarget=(uint8_t *)args->target;
-
-                    /* update the source pointer and index */
-                    offsetNum=nextSourceIndex+(args->source-mySource);
-                    mySource=args->source;
-
-                    /*
-                     * If the callback overflowed the target, then we need to
-                     * stop here with an overflow indication.
-                     */
-                    if(*err==U_BUFFER_OVERFLOW_ERROR) {
-                        break;
-                    } else if(U_FAILURE(*err)) {
-                        /* break on error */
-                        break;
-                    } else if(cnv->charErrorBufferLength>0) {
-                        /* target is full */
-                        *err=U_BUFFER_OVERFLOW_ERROR;
-                        break;
-                        /*
-                         * } else if(ch != 0) { ...
-                         * ### TODO 2002jul01 markus: It looks like this code (from ucnvmbcs.c)
-                         * does not handle the case where the callback leaves ch=fromUSurrogateLead!=0 .
-                         * We would have to check myTarget<targetLimit and goto lowsurrogate?!
-                         */
-                    }
-
-                    continue;
+                    break;
                }
            }

--- a/icu4c/source/common/ucnvbocu.c
+++ b/icu4c/source/common/ucnvbocu.c
@ -402,7 +402,7 @@ U_ALIGN_CODE(16)
    offsets=pArgs->offsets;

    /* get the converter state from UConverter */
-    c=cnv->fromUSurrogateLead;
+    c=cnv->fromUChar32;
    prev=(int32_t)cnv->fromUnicodeStatus;
    if(prev==0) {
        prev=BOCU1_ASCII_PREV;
@ -667,7 +667,7 @@ getTrail:
    }

    /* set the converter state back into UConverter */
-    cnv->fromUSurrogateLead= c<0 ? (UChar)-c : 0;
+    cnv->fromUChar32= c<0 ? -c : 0;
    cnv->fromUnicodeStatus=(uint32_t)prev;

    /* write back the updated pointers */
@ -701,7 +701,7 @@ _Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs,
    targetCapacity=pArgs->targetLimit-pArgs->target;

    /* get the converter state from UConverter */
-    c=cnv->fromUSurrogateLead;
+    c=cnv->fromUChar32;
    prev=(int32_t)cnv->fromUnicodeStatus;
    if(prev==0) {
        prev=BOCU1_ASCII_PREV;
@ -888,7 +888,7 @@ getTrail:
    }

    /* set the converter state back into UConverter */
-    cnv->fromUSurrogateLead= c<0 ? (UChar)-c : 0;
+    cnv->fromUChar32= c<0 ? -c : 0;
    cnv->fromUnicodeStatus=(uint32_t)prev;

    /* write back the updated pointers */
--- a/icu4c/source/common/ucnvhz.c
+++ b/icu4c/source/common/ucnvhz.c
@ -69,7 +69,7 @@ _HZOpen(UConverter *cnv, const char *name,const char *locale,uint32_t options, U
    cnv->toUnicodeStatus = 0;
    cnv->fromUnicodeStatus= 0;
    cnv->mode=0;
-    cnv->fromUSurrogateLead=0x0000;
+    cnv->fromUChar32=0x0000;
    cnv->extraInfo = uprv_malloc (sizeof (UConverterDataHZ));
    if(cnv->extraInfo != NULL){
        ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("ibm-1386",errorCode);
@ -108,7 +108,7 @@ _HZReset(UConverter *cnv, UConverterResetChoice choice){
    }
    if(choice!=UCNV_RESET_TO_UNICODE) {
        cnv->fromUnicodeStatus= 0;
-        cnv->fromUSurrogateLead=0x0000; 
+        cnv->fromUChar32=0x0000; 
        if(cnv->extraInfo != NULL){
            ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE;
            ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0;
@ -347,7 +347,6 @@ UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
    UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo;
    UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS;
    UBool oldIsTargetUCharDBCS = isTargetUCharDBCS;
-    UConverterCallbackReason reason;
    UBool isEscapeAppended =FALSE;
    int len =0;
    const char* escSeq=NULL;
@ -356,7 +355,7 @@ UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
        *err = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
-    if(args->converter->fromUSurrogateLead!=0 && myTargetIndex < targetLength) {
+    if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) {
        goto getTrail;
    }
    /*writing the char to the output stream */
@ -440,16 +439,12 @@ UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,

            }
            else{
-                /* oops.. the code point is unassingned
-                 * set the error and reason
-                 */
-                reason =UCNV_UNASSIGNED;
-                *err =U_INVALID_CHAR_FOUND;
+                /* oops.. the code point is unassigned */
                /*Handle surrogates */
                /*check if the char is a First surrogate*/
                if(UTF_IS_SURROGATE(mySourceChar)) {
                    if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
-                        args->converter->fromUSurrogateLead=(UChar)mySourceChar;
+                        args->converter->fromUChar32=mySourceChar;
 getTrail:
                        /*look ahead to find the trail surrogate*/
                        if(mySourceIndex <  mySourceLength) {
@ -457,87 +452,32 @@ getTrail:
                            UChar trail=(UChar) args->source[mySourceIndex];
                            if(UTF_IS_SECOND_SURROGATE(trail)) {
                                ++mySourceIndex;
-                                mySourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUSurrogateLead, trail);
-                                args->converter->fromUSurrogateLead=0x00;
+                                mySourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUChar32, trail);
+                                args->converter->fromUChar32=0x00;
                                /* there are no surrogates in GB2312*/
                                *err = U_INVALID_CHAR_FOUND;
-                                reason=UCNV_UNASSIGNED;
                                /* exit this condition tree */
                            } else {
                                /* this is an unmatched lead code unit (1st surrogate) */
                                /* callback(illegal) */
-                                reason=UCNV_ILLEGAL;
                                *err=U_ILLEGAL_CHAR_FOUND;
                            }
                        } else {
                            /* no more input */
                            *err = U_ZERO_ERROR;
-                            break;
                        }
                    } else {
                        /* this is an unmatched trail code unit (2nd surrogate) */
                        /* callback(illegal) */
-                        reason=UCNV_ILLEGAL;
                        *err=U_ILLEGAL_CHAR_FOUND;
                    }
+                } else {
+                    /* callback(unassigned) for a BMP code point */
+                    *err = U_INVALID_CHAR_FOUND;
                }

-                {
-                    int32_t saveIndex=0;
-                    int32_t currentOffset = (args->offsets) ? *(offsets-1)+1:0;
-                    char * saveTarget = args->target;
-                    const UChar* saveSource = args->source;
-                    int32_t *saveOffsets = args->offsets;
-
-                    args->converter->invalidUCharLength = 0;
-
-                    if(mySourceChar>0xffff){
-                        args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] =(uint16_t)(((mySourceChar)>>10)+0xd7c0);
-                        args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] =(uint16_t)(((mySourceChar)&0x3ff)|0xdc00);
-                    }
-                    else{
-                        args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] =(UChar)mySourceChar;
-                    }
-                
-                    myConverterData->isTargetUCharDBCS = (UBool)isTargetUCharDBCS;
-                    args->target += myTargetIndex;
-                    args->source += mySourceIndex;
-                    args->offsets = args->offsets?offsets:0;
-                    
-
-                    saveIndex = myTargetIndex; 
-                    /*copies current values for the ErrorFunctor to update */ 
-                    /*Calls the ErrorFunctor */ 
-                    args->converter->fromUCharErrorBehaviour ( args->converter->fromUContext, 
-                                  args, 
-                                  args->converter->invalidUCharBuffer, 
-                                  args->converter->invalidUCharLength, 
-                                 (UChar32) (mySourceChar), 
-                                  reason, 
-                                  err);
-                    /*Update the local Indexes so that the conversion 
-                    *can restart at the right points 
-                    */ 
-                    myTargetIndex = (int32_t)(args->target - (char*)myTarget);
-                    mySourceIndex = (int32_t)(args->source - mySource);
-                    args->offsets = saveOffsets; 
-                    saveIndex = myTargetIndex - saveIndex;
-                    if(args->offsets){
-                        args->offsets = saveOffsets; 
-                        while(saveIndex-->0){
-                             *offsets = currentOffset;
-                              offsets++;
-                        }
-                    }
-                    isTargetUCharDBCS=myConverterData->isTargetUCharDBCS;
-                    args->source = saveSource;
-                    args->target = saveTarget;
-                    args->offsets = saveOffsets;
-                    args->converter->fromUSurrogateLead=0x00;
-                    if (U_FAILURE (*err))
-                        break;
-
-                }
+                args->converter->fromUChar32=mySourceChar;
+                break;
            }
        }
        else{
--- a/icu4c/source/common/ucnvisci.c
+++ b/icu4c/source/common/ucnvisci.c
@ -116,7 +116,7 @@ typedef struct{
    MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */
    MaskEnum currentMaskToUnicode;   /* mask for current state in toUnicode */
    MaskEnum defMaskToUnicode;       /* mask for default state in toUnicode */
-    UBool isFirstBuffer;
+    UBool isFirstBuffer;             /* boolean for fromUnicode to see if we need to announce the first script */
    char name[30];
 }UConverterDataISCII; 

@ -197,13 +197,12 @@ _ISCIIReset(UConverter *cnv, UConverterResetChoice choice){
        data->contextCharToUnicode=NO_CHAR_MARKER;
    }
    if(choice!=UCNV_RESET_TO_UNICODE) {
-        cnv->fromUSurrogateLead=0x0000; 
+        cnv->fromUChar32=0x0000; 
        data->contextCharFromUnicode=0x00;
        data->currentMaskFromUnicode=data->defDeltaToUnicode;
        data->currentDeltaFromUnicode=data->defDeltaToUnicode;
+        data->isFirstBuffer=TRUE;
    }
-    data->isFirstBuffer=TRUE;
-
 }

 /** 
@ -811,7 +810,6 @@ UConverter_fromUnicode_ISCII_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
    int32_t* offsets = args->offsets;
    uint32_t targetByteUnit = 0x0000;
    UChar32 sourceChar = 0x0000;
-    UConverterCallbackReason reason;
    UBool useFallback;
    UConverterDataISCII *converterData;
    uint16_t newDelta=0;
@ -828,7 +826,7 @@ UConverter_fromUnicode_ISCII_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
    newDelta=converterData->currentDeltaFromUnicode;
    range = (uint16_t)(newDelta/DELTA);
    
-    if(args->converter->fromUSurrogateLead!=0 && target <targetLimit) {
+    if((sourceChar = args->converter->fromUChar32)!=0) {
        goto getTrail;
    }

@ -946,16 +944,10 @@ UConverter_fromUnicode_ISCII_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
             }
        }
        else{
-            /* oops.. the code point is unassingned
-             * set the error and reason
-             */
-            reason =UCNV_UNASSIGNED;
-            *err =U_INVALID_CHAR_FOUND;
-
+            /* oops.. the code point is unassigned */
            /*check if the char is a First surrogate*/
            if(UTF_IS_SURROGATE(sourceChar)) {
                if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
-                    args->converter->fromUSurrogateLead=(UChar)sourceChar;
 getTrail:
                    /*look ahead to find the trail surrogate*/
                    if(source <  sourceLimit) {
@ -963,94 +955,32 @@ getTrail:
                        UChar trail= (*source);
                        if(UTF_IS_SECOND_SURROGATE(trail)) {
                            source++;
-                            sourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUSurrogateLead, trail);
-                            args->converter->fromUSurrogateLead=0x00;
-                            reason =UCNV_UNASSIGNED;
+                            sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
                            *err =U_INVALID_CHAR_FOUND;
                            /* convert this surrogate code point */
                            /* exit this condition tree */
                        } else {
                            /* this is an unmatched lead code unit (1st surrogate) */
                            /* callback(illegal) */
-                            sourceChar =  args->converter->fromUSurrogateLead;
-                            reason=UCNV_ILLEGAL;
                            *err=U_ILLEGAL_CHAR_FOUND;
                        }
                    } else {
                        /* no more input */
                        *err = U_ZERO_ERROR;
-                        break;
                    }
                } else {
                    /* this is an unmatched trail code unit (2nd surrogate) */
                    /* callback(illegal) */
-                    reason=UCNV_ILLEGAL;
                    *err=U_ILLEGAL_CHAR_FOUND;
                }
+            } else {
+                /* callback(unassigned) for a BMP code point */
+                *err = U_INVALID_CHAR_FOUND;
            }
-            {
-                /*variables for callback */
-                const UChar* saveSource =NULL;
-                char* saveTarget =NULL;
-                int32_t* saveOffsets =NULL;
-                int currentOffset =0;
-                int32_t saveIndex =0;

-                args->converter->invalidUCharLength = 0;
-
-                if(sourceChar>0xffff){
-                    /* we have got a surrogate pair... dissable and populate the invalidUCharBuffer */
-                    args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] 
-                        =(uint16_t)(((sourceChar)>>10)+0xd7c0);
-                    args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] 
-                        =(uint16_t)(((sourceChar)&0x3ff)|0xdc00);
-                }
-                else{
-                    args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] 
-                        =(UChar)sourceChar;
-                }
-                
-                if(offsets){
-                    currentOffset = *(offsets-1)+1;
-                }
-                saveSource = args->source;
-                saveTarget = args->target;
-                saveOffsets = args->offsets;
-                args->target = (char*)target;
-                args->source = source;
-                args->offsets = offsets;
-
-                /*copies current values for the ErrorFunctor to update */
-                /*Calls the ErrorFunctor */
-                args->converter->fromUCharErrorBehaviour ( args->converter->fromUContext, 
-                              args, 
-                              args->converter->invalidUCharBuffer, 
-                              args->converter->invalidUCharLength, 
-                             (UChar32) (sourceChar), 
-                              reason, 
-                              err);
-
-                saveIndex = (int32_t)(args->target - (char*)target);
-                if(args->offsets){
-                    args->offsets = saveOffsets;
-                    while(saveIndex-->0){
-                         *offsets = currentOffset;
-                          offsets++;
-                    }
-                }
-                target = (unsigned char*)args->target;
-                args->source=saveSource;
-                args->target=saveTarget;
-                args->offsets=saveOffsets;
-                args->converter->fromUSurrogateLead=0x00;
-
-                if (U_FAILURE (*err)){
-                    break;
-                }
-            }
+            args->converter->fromUChar32=sourceChar;
+            break;
        }
-
-
    }/* end while(mySourceIndex<mySourceLength) */

    /*save the state and return */
--- a/icu4c/source/common/ucnvlat1.c
+++ b/icu4c/source/common/ucnvlat1.c
@ -171,7 +171,7 @@ _Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
    }

    /* get the converter state from UConverter */
-    cp=cnv->fromUSurrogateLead;
+    cp=cnv->fromUChar32;

    /* sourceIndex=-1 if the current character began in the previous buffer */
    sourceIndex= cp==0 ? 0 : -1;
@ -299,7 +299,7 @@ getTrail:
                }
            } else {
                /* no more input */
-                cnv->fromUSurrogateLead=(UChar)cp;
+                cnv->fromUChar32=cp;
                break;
            }
        } else {
@ -308,14 +308,7 @@ getTrail:
        }

        *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
-
-        /* write the code point as code units */
-        {
-            int32_t i=0;
-            U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, i, cp);
-            cnv->invalidUCharLength=(int8_t)i;
-        }
-
+        cnv->fromUChar32=cp;
        break;
    }

--- a/icu4c/source/common/ucnvmbcs.c
+++ b/icu4c/source/common/ucnvmbcs.c
@ -315,11 +315,14 @@ static void
 _MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs,
                              UErrorCode *pErrorCode);

-static void
-fromUCallback(UConverter *cnv,
-              const void *context, UConverterFromUnicodeArgs *pArgs,
-              UChar32 codePoint,
-              UConverterCallbackReason reason, UErrorCode *pErrorCode);
+static UChar32
+_extFromU(UConverter *cnv, const UConverterSharedData *sharedData,
+          UChar32 cp,
+          const UChar **source, const UChar *sourceLimit,
+          char **target, const char *targetLimit,
+          int32_t **offsets, int32_t sourceIndex,
+          UBool useFallback, UBool flush,
+          UErrorCode *pErrorCode);

 static void
 toUCallback(UConverter *cnv,
@ -819,7 +822,7 @@ _MBCSOpen(UConverter *cnv,
    cnv->toULength=0;           /* byteIndex */

    /* fromUnicode */
-    cnv->fromUSurrogateLead=0;
+    cnv->fromUChar32=0;
    cnv->fromUnicodeStatus=1;   /* prevLength */
 #endif
 }
@ -2141,7 +2144,6 @@ _MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,

    int32_t prevSourceIndex, sourceIndex, nextSourceIndex;

-    UConverterCallbackReason reason;
    uint32_t stage2Entry;
    uint32_t value;
    int32_t length, prevLength;
@ -2178,7 +2180,7 @@ _MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
    }

    /* get the converter state from UConverter */
-    c=cnv->fromUSurrogateLead;
+    c=cnv->fromUChar32;
    prevLength=cnv->fromUnicodeStatus;

    /* sourceIndex=-1 if the current character began in the previous buffer */
@ -2246,9 +2248,8 @@ getTrail:
                        } else {
                            /* this is an unmatched lead code unit (1st surrogate) */
                            /* callback(illegal) */
-                            reason=UCNV_ILLEGAL;
                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
-                            goto callback;
+                            break;
                        }
                    } else {
                        /* no more input */
@ -2257,9 +2258,8 @@ getTrail:
                } else {
                    /* this is an unmatched trail code unit (2nd surrogate) */
                    /* callback(illegal) */
-                    reason=UCNV_ILLEGAL;
                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
-                    goto callback;
+                    break;
                }
            }

@ -2422,8 +2422,32 @@ getTrail:
                 * There is no way with this data structure for fallback output
                 * for other than U+0000 to be a zero byte.
                 */
-                /* callback(unassigned) */
-                goto unassigned;
+
+unassigned:
+                /* try an extension mapping */
+                pArgs->source=source;
+                c=_extFromU(cnv, cnv->sharedData,
+                            c, &source, sourceLimit,
+                            (char **)&target, (char *)target+targetCapacity,
+                            &offsets, sourceIndex,
+                            (UBool)UCNV_FROM_U_USE_FALLBACK(cnv, c), pArgs->flush,
+                            pErrorCode);
+                nextSourceIndex+=(int32_t)(source-pArgs->source);
+                prevLength=cnv->fromUnicodeStatus; /* restore SISO state */
+
+                if(U_FAILURE(*pErrorCode)) {
+                    /* not mappable or buffer overflow */
+                    break;
+                } else {
+                    /* a mapping was written to the target, continue */
+
+                    /* normal end of conversion: prepare for a new character */
+                    if(offsets!=NULL) {
+                        prevSourceIndex=sourceIndex;
+                        sourceIndex=nextSourceIndex;
+                    }
+                    continue;
+                }
            }

            /* write the output character bytes from value and length */
@ -2529,69 +2553,6 @@ getTrail:
                sourceIndex=nextSourceIndex;
            }
            continue;
-
-            /*
-             * This is the same ugly trick as in ToUnicode(), for the
-             * same reasons...
-             */
-unassigned:
-            reason=UCNV_UNASSIGNED;
-            *pErrorCode=U_INVALID_CHAR_FOUND;
-callback:
-            /* call the callback function with all the preparations and post-processing */
-            /* update the arguments structure */
-            pArgs->source=source;
-            pArgs->target=(char *)target;
-            pArgs->offsets=offsets;
-
-            /* set the converter state in UConverter to deal with the next character */
-            cnv->fromUSurrogateLead=0;
-            /*
-             * Do not save the prevLength SISO state because prevLength is set for
-             * the character that is now not output because it is unassigned or it is
-             * a fallback that is not taken.
-             * The above branch for MBCS_OUTPUT_2_SISO has saved the previous state already.
-             * See comments there.
-             */
-            prevSourceIndex=sourceIndex;
-
-            /* call the callback function */
-            fromUCallback(cnv, cnv->fromUContext, pArgs, c, reason, pErrorCode);
-
-            /* get the converter state from UConverter */
-            c=cnv->fromUSurrogateLead;
-            prevLength=cnv->fromUnicodeStatus;
-
-            /* update target and deal with offsets if necessary */
-            offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex);
-            target=(uint8_t *)pArgs->target;
-
-            /* update the source pointer and index */
-            sourceIndex=nextSourceIndex+(pArgs->source-source);
-            source=pArgs->source;
-            targetCapacity=(uint8_t *)pArgs->targetLimit-target;
-
-            /*
-             * If the callback overflowed the target, then we need to
-             * stop here with an overflow indication.
-             */
-            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
-                break;
-            } else if(U_FAILURE(*pErrorCode)) {
-                /* break on error */
-                c=0;
-                break;
-            } else if(cnv->charErrorBufferLength>0) {
-                /* target is full */
-                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-                break;
-            }
-
-            /*
-             * We do not need to repeat the statements from the normal
-             * end of the conversion because we already updated all the
-             * necessary variables.
-             */
        } else {
            /* target is full */
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
@ -2630,7 +2591,7 @@ callback:
    }

    /* set the converter state back into UConverter */
-    cnv->fromUSurrogateLead=(UChar)c;
+    cnv->fromUChar32=c;
    cnv->fromUnicodeStatus=prevLength;

    /* write back the updated pointers */
@ -2656,7 +2617,6 @@ _MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,

    int32_t sourceIndex, nextSourceIndex;

-    UConverterCallbackReason reason;
    uint32_t stage2Entry;
    uint32_t value;
    int32_t length, prevLength;
@ -2681,7 +2641,7 @@ _MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
    }

    /* get the converter state from UConverter */
-    c=cnv->fromUSurrogateLead;
+    c=cnv->fromUChar32;
    prevLength=cnv->fromUnicodeStatus;

    /* sourceIndex=-1 if the current character began in the previous buffer */
@ -2735,9 +2695,8 @@ getTrail:
                        } else {
                            /* this is an unmatched lead code unit (1st surrogate) */
                            /* callback(illegal) */
-                            reason=UCNV_ILLEGAL;
                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
-                            goto callback;
+                            break;
                        }
                    } else {
                        /* no more input */
@ -2746,9 +2705,8 @@ getTrail:
                } else {
                    /* this is an unmatched trail code unit (2nd surrogate) */
                    /* callback(illegal) */
-                    reason=UCNV_ILLEGAL;
                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
-                    goto callback;
+                    break;
                }
            }

@ -2774,8 +2732,28 @@ getTrail:
                 * There is no way with this data structure for fallback output
                 * for other than U+0000 to be a zero byte.
                 */
-                /* callback(unassigned) */
-                goto unassigned;
+
+unassigned:
+                /* try an extension mapping */
+                pArgs->source=source;
+                c=_extFromU(cnv, cnv->sharedData,
+                            c, &source, sourceLimit,
+                            (char **)&target, (char *)target+targetCapacity,
+                            &offsets, sourceIndex,
+                            (UBool)UCNV_FROM_U_USE_FALLBACK(cnv, c), pArgs->flush,
+                            pErrorCode);
+                nextSourceIndex+=(int32_t)(source-pArgs->source);
+
+                if(U_FAILURE(*pErrorCode)) {
+                    /* not mappable or buffer overflow */
+                    break;
+                } else {
+                    /* a mapping was written to the target, continue */
+
+                    /* normal end of conversion: prepare for a new character */
+                    sourceIndex=nextSourceIndex;
+                    continue;
+                }
            }

            /* write the output character bytes from value and length */
@ -2815,62 +2793,6 @@ getTrail:
            c=0;
            sourceIndex=nextSourceIndex;
            continue;
-
-            /*
-             * This is the same ugly trick as in ToUnicode(), for the
-             * same reasons...
-             */
-unassigned:
-            reason=UCNV_UNASSIGNED;
-            *pErrorCode=U_INVALID_CHAR_FOUND;
-callback:
-            /* call the callback function with all the preparations and post-processing */
-            /* update the arguments structure */
-            pArgs->source=source;
-            pArgs->target=(char *)target;
-            pArgs->offsets=offsets;
-
-            /* set the converter state in UConverter to deal with the next character */
-            cnv->fromUSurrogateLead=0;
-            cnv->fromUnicodeStatus=prevLength;
-
-            /* call the callback function */
-            fromUCallback(cnv, cnv->fromUContext, pArgs, c, reason, pErrorCode);
-
-            /* get the converter state from UConverter */
-            c=cnv->fromUSurrogateLead;
-            prevLength=cnv->fromUnicodeStatus;
-
-            /* update target and deal with offsets if necessary */
-            offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex);
-            target=(uint8_t *)pArgs->target;
-
-            /* update the source pointer and index */
-            sourceIndex=nextSourceIndex+(pArgs->source-source);
-            source=pArgs->source;
-            targetCapacity=(uint8_t *)pArgs->targetLimit-target;
-
-            /*
-             * If the callback overflowed the target, then we need to
-             * stop here with an overflow indication.
-             */
-            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
-                break;
-            } else if(U_FAILURE(*pErrorCode)) {
-                /* break on error */
-                c=0;
-                break;
-            } else if(cnv->charErrorBufferLength>0) {
-                /* target is full */
-                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-                break;
-            }
-
-            /*
-             * We do not need to repeat the statements from the normal
-             * end of the conversion because we already updated all the
-             * necessary variables.
-             */
        } else {
            /* target is full */
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
@ -2879,7 +2801,7 @@ callback:
    }

    /* set the converter state back into UConverter */
-    cnv->fromUSurrogateLead=(UChar)c;
+    cnv->fromUChar32=c;
    cnv->fromUnicodeStatus=prevLength;

    /* write back the updated pointers */
@ -2905,7 +2827,6 @@ _MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,

    int32_t sourceIndex, nextSourceIndex;

-    UConverterCallbackReason reason;
    uint16_t value, minValue;
    UBool hasSupplementary;

@ -2934,7 +2855,7 @@ _MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
    hasSupplementary=(UBool)(cnv->sharedData->table->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);

    /* get the converter state from UConverter */
-    c=cnv->fromUSurrogateLead;
+    c=cnv->fromUChar32;

    /* sourceIndex=-1 if the current character began in the previous buffer */
    sourceIndex= c==0 ? 0 : -1;
@ -2982,9 +2903,8 @@ getTrail:
                        } else {
                            /* this is an unmatched lead code unit (1st surrogate) */
                            /* callback(illegal) */
-                            reason=UCNV_ILLEGAL;
                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
-                            goto callback;
+                            break;
                        }
                    } else {
                        /* no more input */
@ -2993,9 +2913,8 @@ getTrail:
                } else {
                    /* this is an unmatched trail code unit (2nd surrogate) */
                    /* callback(illegal) */
-                    reason=UCNV_ILLEGAL;
                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
-                    goto callback;
+                    break;
                }
            }

@ -3016,65 +2935,28 @@ getTrail:
                /* normal end of conversion: prepare for a new character */
                c=0;
                sourceIndex=nextSourceIndex;
-                continue;
            } else { /* unassigned */
-                /*
-                 * We allow a 0 byte output if the Unicode code point is
-                 * U+0000 and also if the "assigned" bit is set for this entry.
-                 * There is no way with this data structure for fallback output
-                 * for other than U+0000 to be a zero byte.
-                 */
-                /* callback(unassigned) */
-            }
 unassigned:
-            reason=UCNV_UNASSIGNED;
-            *pErrorCode=U_INVALID_CHAR_FOUND;
-callback:
-            /* call the callback function with all the preparations and post-processing */
-            /* update the arguments structure */
-            pArgs->source=source;
-            pArgs->target=(char *)target;
-            pArgs->offsets=offsets;
+                /* try an extension mapping */
+                pArgs->source=source;
+                c=_extFromU(cnv, cnv->sharedData,
+                            c, &source, sourceLimit,
+                            (char **)&target, (char *)target+targetCapacity,
+                            &offsets, sourceIndex,
+                            (UBool)UCNV_FROM_U_USE_FALLBACK(cnv, c), pArgs->flush,
+                            pErrorCode);
+                nextSourceIndex+=(int32_t)(source-pArgs->source);

-            /* set the converter state in UConverter to deal with the next character */
-            cnv->fromUSurrogateLead=0;
+                if(U_FAILURE(*pErrorCode)) {
+                    /* not mappable or buffer overflow */
+                    break;
+                } else {
+                    /* a mapping was written to the target, continue */

-            /* call the callback function */
-            fromUCallback(cnv, cnv->fromUContext, pArgs, c, reason, pErrorCode);
-
-            /* get the converter state from UConverter */
-            c=cnv->fromUSurrogateLead;
-
-            /* update target and deal with offsets if necessary */
-            offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex);
-            target=(uint8_t *)pArgs->target;
-
-            /* update the source pointer and index */
-            sourceIndex=nextSourceIndex+(pArgs->source-source);
-            source=pArgs->source;
-            targetCapacity=(uint8_t *)pArgs->targetLimit-target;
-
-            /*
-             * If the callback overflowed the target, then we need to
-             * stop here with an overflow indication.
-             */
-            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
-                break;
-            } else if(U_FAILURE(*pErrorCode)) {
-                /* break on error */
-                c=0;
-                break;
-            } else if(cnv->charErrorBufferLength>0) {
-                /* target is full */
-                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-                break;
+                    /* normal end of conversion: prepare for a new character */
+                    sourceIndex=nextSourceIndex;
+                }
            }
-
-            /*
-             * We do not need to repeat the statements from the normal
-             * end of the conversion because we already updated all the
-             * necessary variables.
-             */
        } else {
            /* target is full */
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
@ -3083,7 +2965,7 @@ callback:
    }

    /* set the converter state back into UConverter */
-    cnv->fromUSurrogateLead=(UChar)c;
+    cnv->fromUChar32=c;

    /* write back the updated pointers */
    pArgs->source=source;
@ -3113,7 +2995,6 @@ _MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs,

    int32_t sourceIndex;

-    UConverterCallbackReason reason;
    uint16_t value, minValue;

    /* set up the local pointers */
@ -3140,7 +3021,7 @@ _MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs,
    }

    /* get the converter state from UConverter */
-    c=cnv->fromUSurrogateLead;
+    c=cnv->fromUChar32;

    /* sourceIndex=-1 if the current character began in the previous buffer */
    sourceIndex= c==0 ? 0 : -1;
@ -3237,15 +3118,6 @@ unrolled:
            continue;
        } else if(!UTF_IS_SURROGATE(c)) {
            /* normal, unassigned BMP character */
-            /*
-             * We allow a 0 byte output if the Unicode code point is
-             * U+0000 and also if the "assigned" bit is set for this entry.
-             * There is no way with this data structure for fallback output
-             * for other than U+0000 to be a zero byte.
-             */
-            /* callback(unassigned) */
-            reason=UCNV_UNASSIGNED;
-            *pErrorCode=U_INVALID_CHAR_FOUND;
        } else if(UTF_IS_SURROGATE_FIRST(c)) {
 getTrail:
            if(source<sourceLimit) {
@ -3256,13 +3128,11 @@ getTrail:
                    c=UTF16_GET_PAIR_VALUE(c, trail);
                    /* this codepage does not map supplementary code points */
                    /* callback(unassigned) */
-                    reason=UCNV_UNASSIGNED;
-                    *pErrorCode=U_INVALID_CHAR_FOUND;
                } else {
                    /* this is an unmatched lead code unit (1st surrogate) */
                    /* callback(illegal) */
-                    reason=UCNV_ILLEGAL;
                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                    break;
                }
            } else {
                /* no more input */
@ -3271,69 +3141,45 @@ getTrail:
        } else {
            /* this is an unmatched trail code unit (2nd surrogate) */
            /* callback(illegal) */
-            reason=UCNV_ILLEGAL;
            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+            break;
        }

-        /* call the callback function with all the preparations and post-processing */
-        /* get the number of code units for c to correctly advance sourceIndex after the callback call */
-        length=UTF_CHAR_LENGTH(c);
+        /* c does not have a mapping */

-        /* set offsets since the start or the last callback */
+        /* get the number of code units for c to correctly advance sourceIndex */
+        length=U16_LENGTH(c);
+
+        /* set offsets since the start or the last extension */
        if(offsets!=NULL) {
            int32_t count=(int32_t)(source-lastSource);

-            /* do not set the offset for the callback-causing character */
+            /* do not set the offset for this character */
            count-=length;

            while(count>0) {
                *offsets++=sourceIndex++;
                --count;
            }
-            /* offset and sourceIndex are now set for the current character */
+            /* offsets and sourceIndex are now set for the current character */
        }

-        /* update the arguments structure */
-        pArgs->source=source;
-        pArgs->target=(char *)target;
-        pArgs->offsets=offsets;
+        /* try an extension mapping */
+        lastSource=source;
+        c=_extFromU(cnv, cnv->sharedData,
+                    c, &source, sourceLimit,
+                    (char **)&target, (char *)target+targetCapacity,
+                    &offsets, sourceIndex,
+                    (UBool)UCNV_FROM_U_USE_FALLBACK(cnv, c), pArgs->flush,
+                    pErrorCode);
+        sourceIndex+=length+(int32_t)(source-lastSource);
+        lastSource=source;

-        /* set the converter state in UConverter to deal with the next character */
-        cnv->fromUSurrogateLead=0;
-
-        /* call the callback function */
-        fromUCallback(cnv, cnv->fromUContext, pArgs, c, reason, pErrorCode);
-
-        /* get the converter state from UConverter */
-        c=cnv->fromUSurrogateLead;
-
-        /* update target and deal with offsets if necessary */
-        offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex);
-        target=(uint8_t *)pArgs->target;
-
-        /* update the source pointer and index */
-        sourceIndex+=length+(pArgs->source-source);
-        source=lastSource=pArgs->source;
-        targetCapacity=(uint8_t *)pArgs->targetLimit-target;
-        length=sourceLimit-source;
-        if(length<targetCapacity) {
-            targetCapacity=length;
-        }
-
-        /*
-         * If the callback overflowed the target, then we need to
-         * stop here with an overflow indication.
-         */
-        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
-            break;
-        } else if(U_FAILURE(*pErrorCode)) {
-            /* break on error */
-            c=0;
-            break;
-        } else if(cnv->charErrorBufferLength>0) {
-            /* target is full */
-            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+        if(U_FAILURE(*pErrorCode)) {
+            /* not mappable or buffer overflow */
            break;
+        } else {
+            /* a mapping was written to the target, continue */
        }

 #if MBCS_UNROLL_SINGLE_FROM_BMP
@ -3357,7 +3203,7 @@ getTrail:
    }

    /* set the converter state back into UConverter */
-    cnv->fromUSurrogateLead=(UChar)c;
+    cnv->fromUChar32=c;

    /* write back the updated pointers */
    pArgs->source=source;
@ -3672,35 +3518,53 @@ const UConverterSharedData _MBCSData={
    0
 };

-/* GB 18030 special handling ------------------------------------------------ */
+/* conversion extensions for input not in the main table -------------------- */

-/* definition of LINEAR macros and gb18030Ranges see near the beginning of the file */
+/*
+ * Hardcoded extension handling for GB 18030.
+ * Definition of LINEAR macros and gb18030Ranges see near the beginning of the file.
+ *
+ * In the future, conversion extensions may handle m:n mappings and delta tables,
+ * see http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/conversion/conversion_extensions.html
+ *
+ * If an input character cannot be mapped, then these functions set an error
+ * code. The framework will then call the callback function.
+ */

-/* the callback functions handle GB 18030 specially */
-static void
-fromUCallback(UConverter *cnv,
-              const void *context, UConverterFromUnicodeArgs *pArgs,
-              UChar32 codePoint,
-              UConverterCallbackReason reason, UErrorCode *pErrorCode) {
-    int32_t i;
+/*
+ * TODO when implementing real extensions, review whether the useFallback parameter
+ * should get cnv->useFallback or the full resolution considering cp as well
+ */

-    if((cnv->options&_MBCS_OPTION_GB18030)!=0 && reason==UCNV_UNASSIGNED) {
+/*
+ * @return if(U_FAILURE) return the code point for cnv->fromUChar32
+ *         else return 0 after output has been written to the target
+ */
+static UChar32
+_extFromU(UConverter *cnv, const UConverterSharedData *sharedData,
+          UChar32 cp,
+          const UChar **source, const UChar *sourceLimit,
+          char **target, const char *targetLimit,
+          int32_t **offsets, int32_t sourceIndex,
+          UBool useFallback, UBool flush,
+          UErrorCode *pErrorCode) {
+    /* GB 18030 */
+    if(cnv!=NULL && (cnv->options&_MBCS_OPTION_GB18030)!=0) {
        const uint32_t *range;
+        int32_t i;

        range=gb18030Ranges[0];
        for(i=0; i<sizeof(gb18030Ranges)/sizeof(gb18030Ranges[0]); range+=4, ++i) {
-            if(range[0]<=(uint32_t)codePoint && (uint32_t)codePoint<=range[1]) {
+            if(range[0]<=(uint32_t)cp && (uint32_t)cp<=range[1]) {
+                /* found the Unicode code point, output the four-byte sequence for it */
                uint32_t linear;
                char bytes[4];

-                /* found the Unicode code point, output the four-byte sequence for it */
-                *pErrorCode=U_ZERO_ERROR;
-
                /* get the linear value of the first GB 18030 code in this range */
                linear=range[2]-LINEAR_18030_BASE;

                /* add the offset from the beginning of the range */
-                linear+=((uint32_t)codePoint-range[0]);
+                linear+=((uint32_t)cp-range[0]);

                /* turn this into a four-byte sequence */
                bytes[3]=(char)(0x30+linear%10); linear/=10;
@ -3709,21 +3573,21 @@ fromUCallback(UConverter *cnv,
                bytes[0]=(char)(0x81+linear);

                /* output this sequence */
-                ucnv_cbFromUWriteBytes(pArgs, bytes, 4, 0, pErrorCode);
-                return;
+                ucnv_fromUWriteBytes(cnv,
+                                     bytes, 4, target, targetLimit,
+                                     offsets, sourceIndex, pErrorCode);
+                return 0;
            }
        }
    }

-    /* write the code point as code units */
-    i=0;
-    UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, codePoint);
-    cnv->invalidUCharLength=(int8_t)i;
-
-    /* call the normal callback function */
-    cnv->fromUCharErrorBehaviour(context, pArgs, cnv->invalidUCharBuffer, i, codePoint, reason, pErrorCode);
+    /* no mapping */
+    *pErrorCode=U_INVALID_CHAR_FOUND;
+    return cp;
 }

+/* GB 18030 special handling ------------------------------------------------ */
+
 static void
 toUCallback(UConverter *cnv,
            const void *context, UConverterToUnicodeArgs *pArgs,
--- a/icu4c/source/common/ucnvscsu.c
+++ b/icu4c/source/common/ucnvscsu.c
@ -181,7 +181,7 @@ _SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
            break;
        }

-        cnv->fromUSurrogateLead=0;
+        cnv->fromUChar32=0;
    }
 }

@ -216,8 +216,6 @@ _SCSUClose(UConverter *cnv) {

 /* SCSU-to-Unicode conversion functions ------------------------------------- */

-/* ### TODO check operator precedence | << + < */
-
 static void
 _SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
                          UErrorCode *pErrorCode) {
@ -1059,7 +1057,7 @@ _SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
    dynamicWindow=scsu->fromUDynamicWindow;
    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];

-    c=cnv->fromUSurrogateLead;
+    c=cnv->fromUChar32;

    /* sourceIndex=-1 if the current character began in the previous buffer */
    sourceIndex= c==0 ? 0 : -1;
@ -1386,18 +1384,11 @@ getTrailUnicode:
    }
 endloop:

-    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
-        /* c is an unpaired surrogate */
-        cnv->invalidUCharBuffer[0]=(UChar)c;
-        cnv->invalidUCharLength=1;
-        c=0;
-    }
-
    /* set the converter state back into UConverter */
    scsu->fromUIsSingleByteMode=isSingleByteMode;
    scsu->fromUDynamicWindow=dynamicWindow;

-    cnv->fromUSurrogateLead=(UChar)c;
+    cnv->fromUChar32=c;

    /* write back the updated pointers */
    pArgs->source=source;
@ -1553,7 +1544,7 @@ _SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
    dynamicWindow=scsu->fromUDynamicWindow;
    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];

-    c=cnv->fromUSurrogateLead;
+    c=cnv->fromUChar32;

    /* similar conversion "loop" as in toUnicode */
 loop:
@ -1851,18 +1842,11 @@ getTrailUnicode:
    }
 endloop:

-    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
-        /* c is an unpaired surrogate */
-        cnv->invalidUCharBuffer[0]=(UChar)c;
-        cnv->invalidUCharLength=1;
-        c=0;
-    }
-
    /* set the converter state back into UConverter */
    scsu->fromUIsSingleByteMode=isSingleByteMode;
    scsu->fromUDynamicWindow=dynamicWindow;

-    cnv->fromUSurrogateLead=(UChar)c;
+    cnv->fromUChar32=c;

    /* write back the updated pointers */
    pArgs->source=source;