From 6751276083b97a0a4acce574c84c1c1f85fe943d Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Thu, 10 May 2007 20:40:35 +0000 Subject: [PATCH] ICU-5707 fix u_strFromUTF8Lenient() for 2-byte UTF-8 in last 3 bytes of input X-SVN-Rev: 21510 --- icu4c/source/common/ustrtrns.c | 4 ++-- icu4c/source/test/cintltst/custrtrn.c | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/icu4c/source/common/ustrtrns.c b/icu4c/source/common/ustrtrns.c index 1801d8aeb64..5f013c10063 100644 --- a/icu4c/source/common/ustrtrns.c +++ b/icu4c/source/common/ustrtrns.c @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 2001-2006, International Business Machines +* Copyright (C) 2001-2007, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -839,7 +839,7 @@ u_strFromUTF8Lenient(UChar *dest, } else if(ch < 0xe0) { /* U+0080..U+07FF */ if(pSrc < pSrcLimit) { /* 0x3080 = (0xc0 << 6) + 0x80 */ - *pDest++ = (UChar)(((ch & 0x1f) << 6) + *pSrc++ - 0x3080); + *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080); continue; } } else if(ch < 0xf0) { /* U+0800..U+FFFF */ diff --git a/icu4c/source/test/cintltst/custrtrn.c b/icu4c/source/test/cintltst/custrtrn.c index 0335a005575..90cc00ae5c4 100644 --- a/icu4c/source/test/cintltst/custrtrn.c +++ b/icu4c/source/test/cintltst/custrtrn.c @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 2001-2006, International Business Machines Corporation and + * Copyright (c) 2001-2007, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /******************************************************************************** @@ -643,6 +643,10 @@ Test_FromUTF8Lenient(void) { 0xf0, 0x90, 0, 0xf0, 0x90, 0x80, 0, + /* non-ASCII characters in the last few bytes */ + 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0, + 0x61, 0xe0, 0xa0, 0x80, 0xc3, 0x9f, 0, + /* empty string */ 0, @@ -664,6 +668,9 @@ Test_FromUTF8Lenient(void) { 0xfffd, 0, 0xfffd, 0, + 0x61, 0xdf, 0x800, 0, + 0x61, 0x800, 0xdf, 0, + 0, 0