ICU-3346 support simple (single-character) conversion extensions

X-SVN-Rev: 13656
2025-04-08 06:53:45 +00:00 · 2003-11-11 18:42:09 +00:00 · 2003-11-11 18:42:09 +00:00 · 2e6b59aa7d
commit 2e6b59aa7d
parent 506bc1495f
3 changed files with 20 additions and 39 deletions
--- a/icu4c/source/common/ucnv2022.c
+++ b/icu4c/source/common/ucnv2022.c
@ -1595,14 +1595,11 @@ UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                    continue;
                }
                else{
-                    const char *pBuf;
-
                    tempBuf[0] = (char) args->converter->toUnicodeStatus;
                    tempBuf[1] = (char) mySourceChar;
                    mySourceChar+= (args->converter->toUnicodeStatus)<<8;
                    *toUnicodeStatus= 0;
-                    pBuf = tempBuf;
-                    targetUniChar = _MBCSSimpleGetNextUChar(myData->currentConverter->sharedData, &pBuf, tempBuf+2, args->converter->useFallback);
+                    targetUniChar = _MBCSSimpleGetNextUChar(myData->currentConverter->sharedData, tempBuf, 2, args->converter->useFallback);
                }
                break;

@ -1917,8 +1914,7 @@ UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args
 static void 
 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                                                            UErrorCode* err){
-    char tempBuf[3];
-    const char* pBuf;
+    char tempBuf[2];
    const char *mySource = ( char *) args->source;
    UChar *myTarget = args->target;
    const char *mySourceLimit = args->sourceLimit;
@ -1995,19 +1991,17 @@ UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                    tempBuf[1] = (char) (mySourceChar+0x80);
                    mySourceChar = (UChar)(mySourceChar + (args->converter->toUnicodeStatus<<8));
                    args->converter->toUnicodeStatus =0x00;
-                    pBuf = tempBuf;
-                    targetUniChar = _MBCSSimpleGetNextUChar(sharedData,
-                        &pBuf,(pBuf+2),useFallback);
+                    targetUniChar = _MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
                }
            }
            else{
                if(args->converter->fromUnicodeStatus == 0x00){
-                    targetUniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(sharedData, mySourceChar);
+                    targetUniChar = _MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);

                }

            }
-            if(targetUniChar != missingCharMarker){
+            if(targetUniChar < 0xfffe){
                if(args->offsets)
                    args->offsets[myTarget - args->target]= mySource - args->source - 1-(myData->currentType==DBCS);
                *(myTarget++)=(UChar)targetUniChar;
@ -2432,10 +2426,8 @@ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                                               UErrorCode* err){
    char tempBuf[3];
    int plane=0;
-    const char* pBuf;
    const char *mySource = ( char *) args->source;
    UChar *myTarget = args->target;
-    char *tempLimit = &tempBuf[3];
    const char *mySourceLimit = args->sourceLimit;
    uint32_t targetUniChar = 0x0000;
    uint32_t mySourceChar = 0x0000;
@ -2494,22 +2486,22 @@ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                            continue;
                        }
                        else{
+                            int32_t tempBufLen;
                            if(plane >0){
                                tempBuf[0] = (char) (0x80+plane);
                                tempBuf[1] = (char) (args->converter->toUnicodeStatus);
                                tempBuf[2] = (char) (mySourceChar);
-                                tempLimit  = &tempBuf[2]+1;
+                                tempBufLen = 3;

                            }else{
                                tempBuf[0] = (char) args->converter->toUnicodeStatus;
                                tempBuf[1] = (char) mySourceChar;
-                                tempLimit  = &tempBuf[2];
+                                tempBufLen = 2;
                            }
                            mySourceChar+= (uint32_t) args->converter->toUnicodeStatus<<8;
                            args->converter->toUnicodeStatus = 0;
-                            pBuf = tempBuf;
                            if(myData->currentConverter!=NULL){
-                                targetUniChar = _MBCSSimpleGetNextUChar(myData->currentConverter->sharedData, &pBuf, tempLimit, FALSE);
+                                targetUniChar = _MBCSSimpleGetNextUChar(myData->currentConverter->sharedData, tempBuf, tempBufLen, FALSE);
                            }else{
                                *err=U_INVALID_CHAR_FOUND;
                                break;
--- a/icu4c/source/common/ucnv_lmb.c
+++ b/icu4c/source/common/ucnv_lmb.c
@ -1037,12 +1037,12 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs*   args,
                if (*args->source == group) {
                    /* single byte */
                    ++args->source;
-                    uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, &args->source, args->source + 1, FALSE);
+                    uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, args->source, 1, FALSE);
+                    ++args->source;
                } else {
                    /* double byte */
-                    const char *newLimit = args->source + 2;
-                    uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, &args->source, newLimit, FALSE);
-                    args->source = newLimit; /* set the correct limit even in case of an error */
+                    uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, args->source, 2, FALSE);
+                    args->source += 2;
                }
            }
            else {                                  /* single byte conversion */
@ -1058,7 +1058,6 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs*   args,
                    /* The non-optimizable oddballs where there is an explicit byte 
                    * AND the second byte is not in the upper ascii range
                    */
-                    const char *s;
                    char bytes[2];

                    extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
@ -1067,8 +1066,7 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs*   args,
                    /* Lookup value must include opt group */
                    bytes[0] = group;
                    bytes[1] = CurByte;
-                    s = bytes;
-                    uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, &s, bytes + 2, FALSE);
+                    uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, bytes, 2, FALSE);
                }
            }
        }
@ -1084,16 +1082,14 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs*   args,
                    CHECK_SOURCE_LIMIT(0);

                    /* let the MBCS conversion consume CurByte again */
-                    --args->source;
-                    uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, &args->source, args->source + 1, FALSE);
+                    uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, args->source - 1, 1, FALSE);
                }
                else
                {
                    CHECK_SOURCE_LIMIT(1);
                    /* let the MBCS conversion consume CurByte again */
-                    --args->source;
-                    /* since we know that we start at a lead byte, args->source _will_ be incremented by 2 */
-                    uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, &args->source, args->source + 2, FALSE);
+                    uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, args->source - 1, 2, FALSE);
+                    ++args->source;
                }
            }
            else                                   /* single byte conversion */
--- a/icu4c/source/common/ucnvhz.c
+++ b/icu4c/source/common/ucnvhz.c
@ -142,11 +142,9 @@ _HZReset(UConverter *cnv, UConverterResetChoice choice){
 static void 
 UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                                                            UErrorCode* err){
-    char tempBuf[3];
-    const char* pBuf;
+    char tempBuf[2];
    const char *mySource = ( char *) args->source;
    UChar *myTarget = args->target;
-    char *tempLimit = &tempBuf[3]; 
    const char *mySourceLimit = args->sourceLimit;
    UChar32 targetUniChar = 0x0000;
    UChar mySourceChar = 0x0000;
@ -234,19 +232,14 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                    tempBuf[1] = (char) (mySourceChar+0x80);
                    mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
                    args->converter->toUnicodeStatus =0x00;
-                    pBuf = &tempBuf[0];
-                    tempLimit = &tempBuf[2]+1;
                    targetUniChar = _MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
-                        &pBuf,tempLimit,args->converter->useFallback);
+                        tempBuf, 2, args->converter->useFallback);
                }
            }
            else{
                if(args->converter->fromUnicodeStatus == 0x00){
-                    tempBuf[0] = (char) mySourceChar;
-                    pBuf = &tempBuf[0];
-                    tempLimit = &tempBuf[1];
                    targetUniChar = _MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
-                        &pBuf,tempLimit,args->converter->useFallback);
+                        mySource - 1, 1, args->converter->useFallback);
                }
                else{
                    goto SAVE_STATE;