ICU-5707 fix u_strFromUTF8Lenient() for 2-byte UTF-8 in last 3 bytes of input

X-SVN-Rev: 21510
This commit is contained in:
Markus Scherer 2007-05-10 20:40:35 +00:00
parent c08fb2bcf0
commit 6751276083
2 changed files with 10 additions and 3 deletions

View file

@ -1,7 +1,7 @@
/*
******************************************************************************
*
* Copyright (C) 2001-2006, International Business Machines
* Copyright (C) 2001-2007, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@ -839,7 +839,7 @@ u_strFromUTF8Lenient(UChar *dest,
} else if(ch < 0xe0) { /* U+0080..U+07FF */
if(pSrc < pSrcLimit) {
/* 0x3080 = (0xc0 << 6) + 0x80 */
*pDest++ = (UChar)(((ch & 0x1f) << 6) + *pSrc++ - 0x3080);
*pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
continue;
}
} else if(ch < 0xf0) { /* U+0800..U+FFFF */

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 2001-2006, International Business Machines Corporation and
* Copyright (c) 2001-2007, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/********************************************************************************
@ -643,6 +643,10 @@ Test_FromUTF8Lenient(void) {
0xf0, 0x90, 0,
0xf0, 0x90, 0x80, 0,
/* non-ASCII characters in the last few bytes */
0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0,
0x61, 0xe0, 0xa0, 0x80, 0xc3, 0x9f, 0,
/* empty string */
0,
@ -664,6 +668,9 @@ Test_FromUTF8Lenient(void) {
0xfffd, 0,
0xfffd, 0,
0x61, 0xdf, 0x800, 0,
0x61, 0x800, 0xdf, 0,
0,
0