diff --git a/icu4c/source/extra/ustdio/unicode/ustdio.h b/icu4c/source/extra/ustdio/unicode/ustdio.h index 648663faabb..4bbeb28eb63 100644 --- a/icu4c/source/extra/ustdio/unicode/ustdio.h +++ b/icu4c/source/extra/ustdio/unicode/ustdio.h @@ -76,14 +76,11 @@ * u_fsettransliterator does not support U_READ side of transliteration. * The format specifier should limit the size of a format or honor it in order to prevent buffer overruns. (e.g. %256.256d). - * u_fgets is different from stdio. The UChar and UFile arguments are swapped. * u_fread and u_fwrite don't exist. They're needed for reading and writing data structures without any conversion. * u_file_read and u_file_write are used for writing strings. u_fgets and u_fputs or u_fread and u_fwrite should be used to do this. - * u_fgetcx isn't really needed anymore because the transliterator is a - part of the file API. It allows multiple kinds of escape sequences - to be unescaped. + * u_fgetcx may not be needed anymore. Maybe u_fgetc should return a UChar32. * We should consider using a UnicodeSet for scanset. * scanset has a buffer overflow and underflow bug for both string and file APIs. @@ -210,8 +207,8 @@ u_fgetlocale(UFILE *file); * @draft */ U_CAPI int32_t U_EXPORT2 -u_fsetlocale(const char *locale, - UFILE *file); +u_fsetlocale(const char *locale, + UFILE *file); #endif @@ -240,8 +237,8 @@ u_fgetcodepage(UFILE *file); * @draft */ U_CAPI int32_t U_EXPORT2 -u_fsetcodepage(const char *codepage, - UFILE *file); +u_fsetcodepage(const char *codepage, + UFILE *file); /** @@ -263,9 +260,9 @@ U_CAPI UConverter* U_EXPORT2 u_fgetConverter(UFILE *f); * @draft */ U_CAPI int32_t U_EXPORT2 -u_fprintf( UFILE *f, - const char *patternSpecification, - ... ); +u_fprintf(UFILE *f, + const char *patternSpecification, + ... ); /** * Write formatted data to a UFILE. @@ -280,9 +277,9 @@ u_fprintf( UFILE *f, * @draft */ U_CAPI int32_t U_EXPORT2 -u_vfprintf( UFILE *f, - const char *patternSpecification, - va_list ap); +u_vfprintf(UFILE *f, + const char *patternSpecification, + va_list ap); /** * Write formatted data to a UFILE. @@ -293,9 +290,9 @@ u_vfprintf( UFILE *f, * @draft */ U_CAPI int32_t U_EXPORT2 -u_fprintf_u( UFILE *f, - const UChar *patternSpecification, - ... ); +u_fprintf_u(UFILE *f, + const UChar *patternSpecification, + ... ); /** * Write formatted data to a UFILE. @@ -310,9 +307,9 @@ u_fprintf_u( UFILE *f, * @draft */ U_CAPI int32_t U_EXPORT2 -u_vfprintf_u( UFILE *f, - const UChar *patternSpecification, - va_list ap); +u_vfprintf_u(UFILE *f, + const UChar *patternSpecification, + va_list ap); /** * Write a Unicode to a UFILE. The null (U+0000) terminated UChar* @@ -324,8 +321,8 @@ u_vfprintf_u( UFILE *f, * @draft */ U_CAPI int32_t U_EXPORT2 -u_fputs(const UChar *s, - UFILE *f); +u_fputs(const UChar *s, + UFILE *f); /** * Write a UChar to a UFILE. @@ -335,8 +332,8 @@ u_fputs(const UChar *s, * @draft */ U_CAPI int32_t U_EXPORT2 -u_fputc(UChar uc, - UFILE *f); +u_fputc(UChar uc, + UFILE *f); /** * Write Unicode to a UFILE. @@ -349,9 +346,9 @@ u_fputc(UChar uc, * @draft */ U_CAPI int32_t U_EXPORT2 -u_file_write(const UChar *chars, - int32_t count, - UFILE *f); +u_file_write(const UChar *chars, + int32_t count, + UFILE *f); /* Input functions */ @@ -366,9 +363,9 @@ u_file_write(const UChar *chars, * @draft */ U_CAPI int32_t U_EXPORT2 -u_fscanf( UFILE *f, - const char *patternSpecification, - ... ); +u_fscanf(UFILE *f, + const char *patternSpecification, + ... ); /** * Read formatted data from a UFILE. @@ -384,9 +381,9 @@ u_fscanf( UFILE *f, * @draft */ U_CAPI int32_t U_EXPORT2 -u_vfscanf( UFILE *f, - const char *patternSpecification, - va_list ap); +u_vfscanf(UFILE *f, + const char *patternSpecification, + va_list ap); /** * Read formatted data from a UFILE. @@ -398,9 +395,9 @@ u_vfscanf( UFILE *f, * @draft */ U_CAPI int32_t U_EXPORT2 -u_fscanf_u( UFILE *f, - const UChar *patternSpecification, - ... ); +u_fscanf_u(UFILE *f, + const UChar *patternSpecification, + ... ); /** * Read formatted data from a UFILE. @@ -416,9 +413,9 @@ u_fscanf_u( UFILE *f, * @draft */ U_CAPI int32_t U_EXPORT2 -u_vfscanf_u( UFILE *f, - const UChar *patternSpecification, - va_list ap); +u_vfscanf_u(UFILE *f, + const UChar *patternSpecification, + va_list ap); /** * Read one line of text into a UChar* string from a UFILE. The newline @@ -433,9 +430,9 @@ u_vfscanf_u( UFILE *f, * @draft */ U_CAPI UChar* U_EXPORT2 -u_fgets(UFILE *f, - int32_t n, - UChar *s); +u_fgets(UChar *s, + int32_t n, + UFILE *f); /** * Read a UChar from a UFILE. @@ -444,24 +441,20 @@ u_fgets(UFILE *f, * @draft */ U_CAPI UChar U_EXPORT2 -u_fgetc(UFILE *f); +u_fgetc(UFILE *f); /** - * Read a UChar from a UFILE and process escape sequences. If the - * next character is not a backslash, this is the same as calling - * u_fgetc(). If it is, then additional characters comprising the - * escape sequence will be read from the UFILE, parsed, and the - * resultant UChar returned. Ill-formed escape sequences return - * U+FFFFFFFF. + * Read a UChar32 from a UFILE. + * * @param f The UFILE from which to read. - * @return The UChar value read, or U+FFFF if no character was - * available, or U+FFFFFFFF if an ill-formed escape sequence was + * @return The UChar32 value read, or U_EOF if no character was + * available, or U+FFFFFFFF if an ill-formed character was * encountered. * @see u_unescape() * @draft */ U_CAPI UChar32 U_EXPORT2 -u_fgetcx(UFILE *f); +u_fgetcx(UFILE *f); /** * Unget a UChar from a UFILE. @@ -469,11 +462,11 @@ u_fgetcx(UFILE *f); * to u_fgetc, the results are undefined. * @param c The UChar to put back on the stream. * @param f The UFILE to receive c. - * @return The UChar value put back if successful, U+FFFF otherwise. + * @return The UChar32 value put back if successful, U_EOF otherwise. * @draft */ -U_CAPI UChar U_EXPORT2 -u_fungetc(UChar c, +U_CAPI UChar32 U_EXPORT2 +u_fungetc(UChar32 c, UFILE *f); /** @@ -788,4 +781,3 @@ u_vsscanf_u(const UChar *buffer, #endif - diff --git a/icu4c/source/extra/ustdio/ustdio.c b/icu4c/source/extra/ustdio/ustdio.c index 80fae979f6c..267b5b55a95 100644 --- a/icu4c/source/extra/ustdio/ustdio.c +++ b/icu4c/source/extra/ustdio/ustdio.c @@ -435,9 +435,9 @@ ufile_fill_uchar_buffer(UFILE *f) } U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fgets(UFILE *f, - int32_t n, - UChar *s) +u_fgets(UChar *s, + int32_t n, + UFILE *f) { int32_t dataSize; int32_t count; @@ -536,78 +536,70 @@ u_fgetc(UFILE *f) /* otherwise, fill the buffer and return the next character */ else { ufile_fill_uchar_buffer(f); - if(f->fUCPos < f->fUCLimit) + if(f->fUCPos < f->fUCLimit) { return *(f->fUCPos)++; - else - return 0xFFFF; + } + else { + return U_EOF; + } } } -/* u_unescapeAt() callback to return a UChar from a UFILE */ -static UChar U_CALLCONV -_charAt(int32_t offset, void *context) { - return ((UFILE*) context)->fUCPos[offset]; -} - /* Read a UChar from a UFILE and process escape sequences */ U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ u_fgetcx(UFILE *f) { - int32_t length; - int32_t offset; UChar32 c32; - UChar c16; /* Fill the buffer if it is empty */ - if (f->fUCPos >= f->fUCLimit) { + if (f->fUCPos + 1 >= f->fUCLimit) { ufile_fill_uchar_buffer(f); } /* Get the next character in the buffer */ if (f->fUCPos < f->fUCLimit) { - c16 = *(f->fUCPos)++; - } else { - c16 = U_EOF; + c32 = *(f->fUCPos)++; + } + else { + c32 = U_EOF; } - /* If it isn't a backslash, return it */ - if (c16 != 0x005C /*'\\'*/) { - return c16; + if (U_IS_LEAD(c32)) { + if (f->fUCPos < f->fUCLimit) { + UChar c16 = *(f->fUCPos)++; + c32 = U16_GET_SUPPLEMENTARY(c32, c16); + } + else { + c32 = U_EOF; + } } - /* Determine the amount of data in the buffer */ - length = (int32_t)(f->fUCLimit - f->fUCPos); - - /* The longest escape sequence is \Uhhhhhhhh; make sure - we have at least that many characters */ - if (length < 10) { - /* fill the buffer */ - ufile_fill_uchar_buffer(f); - length = (int32_t)(f->fUCLimit - f->fUCPos); - } - - /* Process the escape */ - offset = 0; - c32 = u_unescapeAt(_charAt, &offset, length, (void*)f); - - /* Update the current buffer position */ - f->fUCPos += offset; - return c32; } -U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fungetc(UChar c, +U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fungetc(UChar32 ch, UFILE *f) { /* if we're at the beginning of the buffer, sorry! */ - if(f->fUCPos == f->fUCBuffer) - return 0xFFFF; - /* otherwise, put the character back */ - else { - *--(f->fUCPos) = c; - return c; + if (f->fUCPos == f->fUCBuffer + || (U_IS_LEAD(ch) && (f->fUCPos - 1) == f->fUCBuffer)) + { + ch = U_EOF; } + else { + /* otherwise, put the character back */ + /* TODO: Maybe we shouldn't be writing to the buffer and just verify the contents */ + if (U_IS_LEAD(ch)) { + /* Remember, put them back on in the reverse order. */ + *--(f->fUCPos) = U16_TRAIL(ch); + *--(f->fUCPos) = U16_LEAD(ch); + } + else { + *--(f->fUCPos) = (UChar)ch; + } + } + return ch; } U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ diff --git a/icu4c/source/test/iotest/iotest.cpp b/icu4c/source/test/iotest/iotest.cpp index f79c5bb00f2..26bab5c222b 100644 --- a/icu4c/source/test/iotest/iotest.cpp +++ b/icu4c/source/test/iotest/iotest.cpp @@ -218,13 +218,13 @@ static void TestFileFromICU(UFILE *myFile) { log_err("%%V Got: %f, Expected: %f\n", *newDoubleValuePtr, myFloat); } - u_fgets(myFile, 4, myUString); + u_fgets(myUString, 4, myFile); u_austrncpy(myString, myUString, sizeof(myUString)/sizeof(*myUString)); if (myString == NULL || strcmp(myString, "\t\n") != 0) { log_err("u_fgets got \"%s\"\n", myString); } - if (u_fgets(myFile, sizeof(myUString)/sizeof(*myUString), myUString) != myUString) { + if (u_fgets(myUString, sizeof(myUString)/sizeof(*myUString), myFile) != myUString) { log_err("u_fgets did not return myUString\n"); } u_austrncpy(myString, myUString, sizeof(myUString)/sizeof(*myUString)); @@ -232,7 +232,7 @@ static void TestFileFromICU(UFILE *myFile) { log_err("u_fgets got \"%s\"\n", myString); } - if (u_fgets(myFile, sizeof(myUString)/sizeof(*myUString), myUString) != myUString) { + if (u_fgets(myUString, sizeof(myUString)/sizeof(*myUString), myFile) != myUString) { log_err("u_fgets did not return myUString\n"); } u_austrncpy(myString, myUString, sizeof(myUString)/sizeof(*myUString)); @@ -240,7 +240,7 @@ static void TestFileFromICU(UFILE *myFile) { log_err("u_fgets got \"%s\"\n", myString); } - if (u_fgets(myFile, sizeof(myUString)/sizeof(*myUString), myUString) != myUString) { + if (u_fgets(myUString, sizeof(myUString)/sizeof(*myUString), myFile) != myUString) { log_err("u_fgets did not return myUString\n"); } u_austrncpy(myString, myUString, sizeof(myUString)/sizeof(*myUString)); @@ -248,11 +248,11 @@ static void TestFileFromICU(UFILE *myFile) { log_err("u_fgets got \"%s\"\n", myString); } - if (u_fgets(myFile, 0, myUString) != NULL) { + if (u_fgets(myUString, 0, myFile) != NULL) { log_err("u_fgets got \"%s\" and it should have returned NULL\n", myString); } - if (u_fgets(myFile, 1, myUString) != myUString) { + if (u_fgets(myUString, 1, myFile) != myUString) { log_err("u_fgets did not return myUString\n"); } u_austrncpy(myString, myUString, sizeof(myUString)/sizeof(*myUString)); @@ -260,7 +260,7 @@ static void TestFileFromICU(UFILE *myFile) { log_err("u_fgets got \"%s\"\n", myString); } - if (u_fgets(myFile, 2, myUString) != myUString) { + if (u_fgets(myUString, 2, myFile) != myUString) { log_err("u_fgets did not return myUString\n"); } u_austrncpy(myString, myUString, sizeof(myUString)/sizeof(*myUString)); @@ -377,7 +377,7 @@ static void TestfgetsBuffers() { if (u_fgetc(myFile) != 0xFF41) { log_err("The second character is wrong\n"); } - if (u_fgets(myFile, sizeof(buffer)/sizeof(buffer[0]), buffer) != buffer) { + if (u_fgets(buffer, sizeof(buffer)/sizeof(buffer[0]), myFile) != buffer) { log_err("Didn't get the buffer back\n"); return; } @@ -423,7 +423,7 @@ static void TestfgetsBuffers() { if (u_fgetc(myFile) != 0xFF41) { log_err("The second character is wrong\n"); } - if (u_fgets(myFile, sizeof(buffer)/sizeof(buffer[0]), buffer) != buffer) { + if (u_fgets(buffer, sizeof(buffer)/sizeof(buffer[0]), myFile) != buffer) { log_err("Didn't get the buffer back\n"); return; } @@ -456,7 +456,7 @@ static void TestfgetsBuffers() { u_memset(buffer, 0xDEAD, sizeof(buffer)/sizeof(buffer[0])); myFile = u_fopen(STANDARD_TEST_FILE, "r", NULL, "UTF-8"); - if (u_fgets(myFile, 2, buffer) != buffer) { + if (u_fgets(buffer, 2, myFile) != buffer) { log_err("Didn't get the buffer back\n"); return; } @@ -502,7 +502,7 @@ static void TestfgetsLineCount() { for (;;) { u_memset(buffer, 0xDEAD, sizeof(buffer)/sizeof(buffer[0])); char *returnedCharBuffer = fgets(charBuffer, sizeof(charBuffer)/sizeof(charBuffer[0]), stdFile); - UChar *returnedUCharBuffer = u_fgets(myFile, sizeof(buffer)/sizeof(buffer[0]), buffer); + UChar *returnedUCharBuffer = u_fgets(buffer, sizeof(buffer)/sizeof(buffer[0]), myFile); if (!returnedCharBuffer && !returnedUCharBuffer) { /* Both returned NULL. stop. */ @@ -575,7 +575,7 @@ static void TestfgetsNewLineHandling() { for (lineIdx = 0; lineIdx < (int32_t)(sizeof(testUStr)/sizeof(testUStr[0])); lineIdx++) { u_memset(buffer, 0xDEAD, sizeof(buffer)/sizeof(buffer[0])); - UChar *returnedUCharBuffer = u_fgets(myFile, sizeof(buffer)/sizeof(buffer[0]), buffer); + UChar *returnedUCharBuffer = u_fgets(buffer, sizeof(buffer)/sizeof(buffer[0]), myFile); if (!returnedUCharBuffer) { /* Returned NULL. stop. */ @@ -591,7 +591,7 @@ static void TestfgetsNewLineHandling() { if (lineIdx != (int32_t)(sizeof(testUStr)/sizeof(testUStr[0]))) { log_err("u_fgets read too much\n"); } - if (u_fgets(myFile, sizeof(buffer)/sizeof(buffer[0]), buffer) != NULL) { + if (u_fgets(buffer, sizeof(buffer)/sizeof(buffer[0]), myFile) != NULL) { log_err("u_file_write wrote too much\n"); } u_fclose(myFile); @@ -739,7 +739,7 @@ static void TestFilePrintCompatibility() { uNumPrinted = u_fprintf(myFile, uFormat, uValue);\ u_fclose(myFile);\ myFile = u_fopen(STANDARD_TEST_FILE, "r", "en_US_POSIX", NULL);\ - u_fgets(myFile, sizeof(uBuffer)/sizeof(*uBuffer), uBuffer);\ + u_fgets(uBuffer, sizeof(uBuffer)/sizeof(*uBuffer), myFile);\ u_fclose(myFile);\ u_austrncpy(compBuffer, uBuffer, sizeof(uBuffer)/sizeof(*uBuffer));\ cNumPrinted = sprintf(buffer, cFormat, cValue);\ @@ -762,7 +762,7 @@ static void TestFilePrintCompatibility() { uNumPrinted = u_fprintf(myFile, format, precision, value);\ u_fclose(myFile);\ myFile = u_fopen(STANDARD_TEST_FILE, "r", "en_US_POSIX", NULL);\ - u_fgets(myFile, sizeof(uBuffer)/sizeof(*uBuffer), uBuffer);\ + u_fgets(uBuffer, sizeof(uBuffer)/sizeof(*uBuffer), myFile);\ u_fclose(myFile);\ u_austrncpy(compBuffer, uBuffer, sizeof(uBuffer)/sizeof(*uBuffer));\ cNumPrinted = sprintf(buffer, format, precision, value);\ @@ -900,7 +900,7 @@ static void TestFprintfFormat() { uNumPrinted = u_fprintf(myFile, "%d % d %d", -1234, 1234, 1234); u_fclose(myFile); myFile = u_fopen(STANDARD_TEST_FILE, "r", "en_US_POSIX", NULL); - u_fgets(myFile, sizeof(uBuffer)/sizeof(*uBuffer), uBuffer); + u_fgets(uBuffer, sizeof(uBuffer)/sizeof(*uBuffer), myFile); u_fclose(myFile); u_austrncpy(compBuffer, uBuffer, sizeof(uBuffer)/sizeof(*uBuffer)); cNumPrinted = sprintf(buffer, "%d % d %d", -1234, 1234, 1234);