From dd2917fc9b79b7a7389a6610cfb5fb4e5499c6c1 Mon Sep 17 00:00:00 2001 From: George Rhoten Date: Fri, 5 May 2006 07:08:37 +0000 Subject: [PATCH] ICU-5195 Fix codepoint handling that stradle buffers. X-SVN-Rev: 19614 --- icu4c/source/common/ucnv_u32.c | 13 ++-- icu4c/source/test/cintltst/ncnvtst.c | 98 ++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 4 deletions(-) diff --git a/icu4c/source/common/ucnv_u32.c b/icu4c/source/common/ucnv_u32.c index 223f923fdce..034bf9bbcb9 100644 --- a/icu4c/source/common/ucnv_u32.c +++ b/icu4c/source/common/ucnv_u32.c @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2002-2005, International Business Machines +* Copyright (C) 2002-2006, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucnv_u32.c @@ -51,9 +51,10 @@ T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args, unsigned char *toUBytes = args->converter->toUBytes; uint32_t ch, i; - /* UTF-8 returns here for only non-offset, this needs to change.*/ + /* Restore state of current sequence */ if (args->converter->toUnicodeStatus && myTarget < targetLimit) { i = args->converter->toULength; /* restore # of bytes consumed */ + args->converter->toULength = 0; ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ args->converter->toUnicodeStatus = 0; @@ -131,8 +132,10 @@ T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, uint32_t ch, i; int32_t offsetNum = 0; + /* Restore state of current sequence */ if (args->converter->toUnicodeStatus && myTarget < targetLimit) { i = args->converter->toULength; /* restore # of bytes consumed */ + args->converter->toULength = 0; ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ args->converter->toUnicodeStatus = 0; @@ -510,10 +513,11 @@ T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args, unsigned char *toUBytes = args->converter->toUBytes; uint32_t ch, i; - /* UTF-8 returns here for only non-offset, this needs to change.*/ + /* Restore state of current sequence */ if (args->converter->toUnicodeStatus && myTarget < targetLimit) { i = args->converter->toULength; /* restore # of bytes consumed */ + args->converter->toULength = 0; /* Stores the previously calculated ch from a previous call*/ ch = args->converter->toUnicodeStatus - 1; @@ -596,10 +600,11 @@ T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, uint32_t ch, i; int32_t offsetNum = 0; - /* UTF-8 returns here for only non-offset, this needs to change.*/ + /* Restore state of current sequence */ if (args->converter->toUnicodeStatus && myTarget < targetLimit) { i = args->converter->toULength; /* restore # of bytes consumed */ + args->converter->toULength = 0; /* Stores the previously calculated ch from a previous call*/ ch = args->converter->toUnicodeStatus - 1; diff --git a/icu4c/source/test/cintltst/ncnvtst.c b/icu4c/source/test/cintltst/ncnvtst.c index d24378b0d89..9a3c124aebe 100644 --- a/icu4c/source/test/cintltst/ncnvtst.c +++ b/icu4c/source/test/cintltst/ncnvtst.c @@ -699,6 +699,39 @@ static void TestRegressionUTF8(){ } free(standardForm); free(utf8); + + { + static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 }; + static const UChar expected[] = { 0x0301, 0x0300 }; + UConverter *conv8; + UErrorCode err = U_ZERO_ERROR; + UChar pivotBuffer[100]; + const UChar* const pivEnd = pivotBuffer + 100; + const char* srcBeg; + const char* srcEnd; + UChar* pivBeg; + + conv8 = ucnv_open("UTF-8", &err); + + srcBeg = src8; + pivBeg = pivotBuffer; + srcEnd = src8 + 3; + ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); + if (srcBeg != srcEnd) { + log_err("Did not consume whole buffer on first call.\n"); + } + + srcEnd = src8 + 4; + ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); + if (srcBeg != srcEnd) { + log_err("Did not consume whole buffer on second call.\n"); + } + + if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { + log_err("Did not get expected results for UTF-8.\n"); + } + ucnv_close(conv8); + } } #define MAX_UTF32_LEN 1 @@ -770,6 +803,71 @@ static void TestRegressionUTF32(){ expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) log_err("u->UTF-32LE\n"); } + + { + static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 }; + static const UChar expected[] = { 0x0031, 0x0030 }; + UConverter *convBE; + UErrorCode err = U_ZERO_ERROR; + UChar pivotBuffer[100]; + const UChar* const pivEnd = pivotBuffer + 100; + const char* srcBeg; + const char* srcEnd; + UChar* pivBeg; + + convBE = ucnv_open("UTF-32BE", &err); + + srcBeg = srcBE; + pivBeg = pivotBuffer; + srcEnd = srcBE + 5; + ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); + if (srcBeg != srcEnd) { + log_err("Did not consume whole buffer on first call.\n"); + } + + srcEnd = srcBE + 8; + ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); + if (srcBeg != srcEnd) { + log_err("Did not consume whole buffer on second call.\n"); + } + + if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { + log_err("Did not get expected results for UTF-32BE.\n"); + } + ucnv_close(convBE); + } + { + static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 }; + static const UChar expected[] = { 0x0031, 0x0030 }; + UConverter *convLE; + UErrorCode err = U_ZERO_ERROR; + UChar pivotBuffer[100]; + const UChar* const pivEnd = pivotBuffer + 100; + const char* srcBeg; + const char* srcEnd; + UChar* pivBeg; + + convLE = ucnv_open("UTF-32LE", &err); + + srcBeg = srcLE; + pivBeg = pivotBuffer; + srcEnd = srcLE + 5; + ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); + if (srcBeg != srcEnd) { + log_err("Did not consume whole buffer on first call.\n"); + } + + srcEnd = srcLE + 8; + ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); + if (srcBeg != srcEnd) { + log_err("Did not consume whole buffer on second call.\n"); + } + + if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { + log_err("Did not get expected results for UTF-32LE.\n"); + } + ucnv_close(convLE); + } } /*Walk through the available converters*/