ICU-21593 Merge ReadAndConvertFile

2025-04-06 14:05:32 +00:00 · 2021-04-22 16:52:48 -07:00 · 2021-04-22 16:52:48 -07:00 · 4689706386
commit 4689706386
parent 581c427557
10 changed files with 132 additions and 401 deletions
--- a/icu4c/source/test/intltest/dcfmtest.cpp
+++ b/icu4c/source/test/intltest/dcfmtest.cpp
@ -218,7 +218,7 @@ void DecimalFormatTest::DataDrivenTests() {
    }

    int32_t    len;
-    UChar *testData = ReadAndConvertFile(srcPath, len, status);
+    UChar *testData = ReadAndConvertFile(srcPath, len, "utf-8", status);
    if (U_FAILURE(status)) {
        return; /* something went wrong, error already output */
    }
@ -481,94 +481,5 @@ void DecimalFormatTest::execFormatTest(int32_t lineNum,
 }


-//-------------------------------------------------------------------------------
-//      
-//  Read a text data file, convert it from UTF-8 to UChars, and return the data
-//    in one big UChar * buffer, which the caller must delete.
-//
-//    (Lightly modified version of a similar function in regextst.cpp)
-//
-//--------------------------------------------------------------------------------
-UChar *DecimalFormatTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
-                                     UErrorCode &status) {
-    UChar       *retPtr  = NULL;
-    char        *fileBuf = NULL;
-    const char  *fileBufNoBOM = NULL;
-    FILE        *f       = NULL;
-
-    ulen = 0;
-    if (U_FAILURE(status)) {
-        return retPtr;
-    }
-
-    //
-    //  Open the file.
-    //
-    f = fopen(fileName, "rb");
-    if (f == 0) {
-        dataerrln("Error opening test data file %s\n", fileName);
-        status = U_FILE_ACCESS_ERROR;
-        return NULL;
-    }
-    //
-    //  Read it in
-    //
-    int32_t            fileSize;
-    int32_t            amtRead;
-    int32_t            amtReadNoBOM;
-
-    fseek( f, 0, SEEK_END);
-    fileSize = ftell(f);
-    fileBuf = new char[fileSize];
-    fseek(f, 0, SEEK_SET);
-    amtRead = static_cast<int32_t>(fread(fileBuf, 1, fileSize, f));
-    if (amtRead != fileSize || fileSize <= 0) {
-        errln("Error reading test data file.");
-        goto cleanUpAndReturn;
-    }
-
-    //
-    // Look for a UTF-8 BOM on the data just read.
-    //    The test data file is UTF-8.
-    //    The BOM needs to be there in the source file to keep the Windows & 
-    //    EBCDIC machines happy, so force an error if it goes missing.  
-    //    Many Linux editors will silently strip it.
-    //
-    fileBufNoBOM = fileBuf + 3;
-    amtReadNoBOM = amtRead - 3;
-    if (fileSize<3 || uprv_strncmp(fileBuf, "\xEF\xBB\xBF", 3) != 0) {
-        // TODO:  restore this check.
-        errln("Test data file %s is missing its BOM", fileName);
-        fileBufNoBOM = fileBuf;
-        amtReadNoBOM = amtRead;
-    }
-
-    //
-    // Find the length of the input in UTF-16 UChars
-    //  (by preflighting the conversion)
-    //
-    u_strFromUTF8(NULL, 0, &ulen, fileBufNoBOM, amtReadNoBOM, &status);
-
-    //
-    // Convert file contents from UTF-8 to UTF-16
-    //
-    if (status == U_BUFFER_OVERFLOW_ERROR) {
-        // Buffer Overflow is expected from the preflight operation.
-        status = U_ZERO_ERROR;
-        retPtr = new UChar[ulen+1];
-        u_strFromUTF8(retPtr, ulen+1, NULL, fileBufNoBOM, amtReadNoBOM, &status);
-    }
-
-cleanUpAndReturn:
-    fclose(f);
-    delete[] fileBuf;
-    if (U_FAILURE(status)) {
-        errln("ICU Error \"%s\"\n", u_errorName(status));
-        delete retPtr;
-        retPtr = NULL;
-    }
-    return retPtr;
-}
-
 #endif  /* !UCONFIG_NO_REGULAR_EXPRESSIONS  */

--- a/icu4c/source/test/intltest/dcfmtest.h
+++ b/icu4c/source/test/intltest/dcfmtest.h
@ -34,8 +34,6 @@ public:
    // The following are test functions that are visible from the intltest test framework.
    virtual void DataDrivenTests();

-    // The following functions are internal to the decimal format tests.
-    virtual UChar *ReadAndConvertFile(const char *fileName, int32_t &len, UErrorCode &status);
    virtual const char *getPath(char buffer[2048], const char *filename);
    virtual void execParseTest(int32_t lineNum,
                              const UnicodeString &inputText,
--- a/icu4c/source/test/intltest/idnaconf.cpp
+++ b/icu4c/source/test/intltest/idnaconf.cpp
@ -24,6 +24,7 @@
 #include "unicode/uidna.h"
 #include "unicode/utf16.h"
 #include "idnaconf.h"
+#include "charstr.h"

 static const UChar C_TAG[] = {0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0}; // =====
 static const UChar C_NAMEZONE[] = {0x6E, 0x61, 0x6D, 0x65, 0x7A, 0x6F, 0x6E, 0x65, 0}; // namezone 
@ -55,82 +56,6 @@ IdnaConfTest::~IdnaConfTest(){
 }

 #if !UCONFIG_NO_IDNA
-/* this function is modified from RBBITest::ReadAndConvertFile() 
- *
- */
-UBool IdnaConfTest::ReadAndConvertFile(){
-    
-    char * source = NULL;
-    size_t source_len;
-
-    // read the test data file to memory
-    FILE* f    = NULL;
-    UErrorCode  status  = U_ZERO_ERROR;
-
-    const char *path = IntlTest::getSourceTestData(status);
-    if (U_FAILURE(status)) {
-        errln("%s", u_errorName(status));
-        return FALSE;
-    }
-
-    const char* name = "idna_conf.txt";     // test data file
-    int t = static_cast<int>(strlen(path) + strlen(name) + 1);
-    char* absolute_name = new char[t];
-    strcpy(absolute_name, path);
-    strcat(absolute_name, name);
-    f = fopen(absolute_name, "rb");
-    delete [] absolute_name;
-
-    if (f == NULL){
-        dataerrln("fopen error on %s", name);
-        return FALSE;
-    }
-
-    fseek( f, 0, SEEK_END);
-    if ((source_len = ftell(f)) <= 0){
-        errln("Error reading test data file.");
-        fclose(f);
-        return FALSE;
-    }
-
-    source = new char[source_len];
-    fseek(f, 0, SEEK_SET);
-    if (fread(source, 1, source_len, f) != source_len) {
-        errln("Error reading test data file.");
-        delete [] source;
-        fclose(f);
-        return FALSE;
-    }
-    fclose(f);
-
-    // convert the UTF-8 encoded stream to UTF-16 stream
-    UConverter* conv = ucnv_open("utf-8", &status);
-    int dest_len = ucnv_toUChars(conv,
-                                NULL,           //  dest,
-                                0,              //  destCapacity,
-                                source,
-                                static_cast<int32_t>(source_len),
-                                &status);
-    if (status == U_BUFFER_OVERFLOW_ERROR) {
-        // Buffer Overflow is expected from the preflight operation.
-        status = U_ZERO_ERROR;
-        UChar * dest = NULL;
-        dest = new UChar[ dest_len + 1];
-        ucnv_toUChars(conv, dest, dest_len + 1, source, static_cast<int32_t>(source_len), &status);
-        // Do not know the "if possible" behavior of ucnv_toUChars()
-        // Do it by ourself.
-        dest[dest_len] = 0; 
-        len = dest_len;
-        base = dest;
-        delete [] source;
-        ucnv_close(conv);
-        return TRUE;    // The buffer will owned by caller.
-    }
-    errln("UConverter error: %s", u_errorName(status));
-    delete [] source;
-    ucnv_close(conv);
-    return FALSE;
-}

 int IdnaConfTest::isNewlineMark(){
    static const UChar LF        = 0x0a;
@ -280,7 +205,18 @@ void IdnaConfTest::Call(){
 }

 void IdnaConfTest::Test(void){
-    if (!ReadAndConvertFile())return;
+    UErrorCode  status  = U_ZERO_ERROR;
+    //
+    //  Open and read the test data file.
+    //
+    const char *testDataDirectory = IntlTest::getSourceTestData(status);
+    CharString testFileName(testDataDirectory, -1, status);
+    testFileName.append("idna_conf.txt", -1, status);
+
+    base = ReadAndConvertFile(testFileName.data(), len, "UTF-8", status);
+    if (U_FAILURE(status)) {
+        return;
+    }

    UnicodeString s;
    UnicodeString key;
--- a/icu4c/source/test/intltest/idnaconf.h
+++ b/icu4c/source/test/intltest/idnaconf.h
@ -32,7 +32,6 @@ private:
    int len ;
    int curOffset;

-    UBool  ReadAndConvertFile();
    int isNewlineMark();
    UBool ReadOneLine(UnicodeString&);

--- a/icu4c/source/test/intltest/intltest.cpp
+++ b/icu4c/source/test/intltest/intltest.cpp
@ -2312,6 +2312,122 @@ const char* IntlTest::getProperty(const char* prop) {
    return val;
 }

+//-------------------------------------------------------------------------------
+//
+//    ReadAndConvertFile   Read a text data file, convert it to UChars, and
+//    return the data in one big UChar * buffer, which the caller must delete.
+//
+//    parameters:
+//          fileName:   the name of the file, with no directory part.  The test data directory
+//                      is assumed.
+//          ulen        an out parameter, receives the actual length (in UChars) of the file data.
+//          encoding    The file encoding.  If the file contains a BOM, that will override the encoding
+//                      specified here.  The BOM, if it exists, will be stripped from the returned data.
+//                      Pass NULL for the system default encoding.
+//          status
+//    returns:
+//                      The file data, converted to UChar.
+//                      The caller must delete this when done with
+//                           delete [] theBuffer;
+//
+//
+//--------------------------------------------------------------------------------
+UChar *IntlTest::ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status) {
+    UChar       *retPtr  = NULL;
+    char        *fileBuf = NULL;
+    UConverter* conv     = NULL;
+    FILE        *f       = NULL;
+
+    ulen = 0;
+    if (U_FAILURE(status)) {
+        return retPtr;
+    }
+
+    //
+    //  Open the file.
+    //
+    f = fopen(fileName, "rb");
+    if (f == 0) {
+        dataerrln("Error opening test data file %s\n", fileName);
+        status = U_FILE_ACCESS_ERROR;
+        return NULL;
+    }
+    //
+    //  Read it in
+    //
+    int   fileSize;
+    int   amt_read;
+
+    fseek( f, 0, SEEK_END);
+    fileSize = ftell(f);
+    fileBuf = new char[fileSize];
+    fseek(f, 0, SEEK_SET);
+    amt_read = static_cast<int>(fread(fileBuf, 1, fileSize, f));
+    if (amt_read != fileSize || fileSize <= 0) {
+        errln("Error reading test data file.");
+        goto cleanUpAndReturn;
+    }
+
+    //
+    // Look for a Unicode Signature (BOM) on the data just read
+    //
+    int32_t        signatureLength;
+    const char *   fileBufC;
+    const char*    bomEncoding;
+
+    fileBufC = fileBuf;
+    bomEncoding = ucnv_detectUnicodeSignature(
+        fileBuf, fileSize, &signatureLength, &status);
+    if(bomEncoding!=NULL ){
+        fileBufC  += signatureLength;
+        fileSize  -= signatureLength;
+        encoding = bomEncoding;
+    }
+
+    //
+    // Open a converter to take the rule file to UTF-16
+    //
+    conv = ucnv_open(encoding, &status);
+    if (U_FAILURE(status)) {
+        goto cleanUpAndReturn;
+    }
+
+    //
+    // Convert the rules to UChar.
+    //  Preflight first to determine required buffer size.
+    //
+    ulen = ucnv_toUChars(conv,
+        NULL,           //  dest,
+        0,              //  destCapacity,
+        fileBufC,
+        fileSize,
+        &status);
+    if (status == U_BUFFER_OVERFLOW_ERROR) {
+        // Buffer Overflow is expected from the preflight operation.
+        status = U_ZERO_ERROR;
+
+        retPtr = new UChar[ulen+1];
+        ucnv_toUChars(conv,
+            retPtr,       //  dest,
+            ulen+1,
+            fileBufC,
+            fileSize,
+            &status);
+    }
+
+cleanUpAndReturn:
+    fclose(f);
+    delete []fileBuf;
+    ucnv_close(conv);
+    if (U_FAILURE(status)) {
+        errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
+        delete []retPtr;
+        retPtr = 0;
+        ulen   = 0;
+    }
+    return retPtr;
+}
+
 /*
 * Hey, Emacs, please set the following:
 *
--- a/icu4c/source/test/intltest/intltest.h
+++ b/icu4c/source/test/intltest/intltest.h
@ -416,6 +416,8 @@ public:
    virtual const char* getTestDataPath(UErrorCode& err);
    static const char* getSourceTestData(UErrorCode& err);
    static char *getUnidataPath(char path[]);
+    UChar *ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status);
+

 // static members
 public:
--- a/icu4c/source/test/intltest/rbbitst.cpp
+++ b/icu4c/source/test/intltest/rbbitst.cpp
@ -1165,126 +1165,6 @@ void RBBITest::TestDictRules() {



-//-------------------------------------------------------------------------------
-//
-//    ReadAndConvertFile   Read a text data file, convert it to UChars, and
-//    return the data in one big UChar * buffer, which the caller must delete.
-//
-//    parameters:
-//          fileName:   the name of the file, with no directory part.  The test data directory
-//                      is assumed.
-//          ulen        an out parameter, receives the actual length (in UChars) of the file data.
-//          encoding    The file encoding.  If the file contains a BOM, that will override the encoding
-//                      specified here.  The BOM, if it exists, will be stripped from the returned data.
-//                      Pass NULL for the system default encoding.
-//          status
-//    returns:
-//                      The file data, converted to UChar.
-//                      The caller must delete this when done with
-//                           delete [] theBuffer;
-//
-//    TODO:  This is a clone of RegexTest::ReadAndConvertFile.
-//           Move this function to some common place.
-//
-//--------------------------------------------------------------------------------
-UChar *RBBITest::ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status) {
-    UChar       *retPtr  = NULL;
-    char        *fileBuf = NULL;
-    UConverter* conv     = NULL;
-    FILE        *f       = NULL;
-
-    ulen = 0;
-    if (U_FAILURE(status)) {
-        return retPtr;
-    }
-
-    //
-    //  Open the file.
-    //
-    f = fopen(fileName, "rb");
-    if (f == 0) {
-        dataerrln("Error opening test data file %s\n", fileName);
-        status = U_FILE_ACCESS_ERROR;
-        return NULL;
-    }
-    //
-    //  Read it in
-    //
-    int   fileSize;
-    int   amt_read;
-
-    fseek( f, 0, SEEK_END);
-    fileSize = ftell(f);
-    fileBuf = new char[fileSize];
-    fseek(f, 0, SEEK_SET);
-    amt_read = static_cast<int>(fread(fileBuf, 1, fileSize, f));
-    if (amt_read != fileSize || fileSize <= 0) {
-        errln("Error reading test data file.");
-        goto cleanUpAndReturn;
-    }
-
-    //
-    // Look for a Unicode Signature (BOM) on the data just read
-    //
-    int32_t        signatureLength;
-    const char *   fileBufC;
-    const char*    bomEncoding;
-
-    fileBufC = fileBuf;
-    bomEncoding = ucnv_detectUnicodeSignature(
-        fileBuf, fileSize, &signatureLength, &status);
-    if(bomEncoding!=NULL ){
-        fileBufC  += signatureLength;
-        fileSize  -= signatureLength;
-        encoding = bomEncoding;
-    }
-
-    //
-    // Open a converter to take the rule file to UTF-16
-    //
-    conv = ucnv_open(encoding, &status);
-    if (U_FAILURE(status)) {
-        goto cleanUpAndReturn;
-    }
-
-    //
-    // Convert the rules to UChar.
-    //  Preflight first to determine required buffer size.
-    //
-    ulen = ucnv_toUChars(conv,
-        NULL,           //  dest,
-        0,              //  destCapacity,
-        fileBufC,
-        fileSize,
-        &status);
-    if (status == U_BUFFER_OVERFLOW_ERROR) {
-        // Buffer Overflow is expected from the preflight operation.
-        status = U_ZERO_ERROR;
-
-        retPtr = new UChar[ulen+1];
-        ucnv_toUChars(conv,
-            retPtr,       //  dest,
-            ulen+1,
-            fileBufC,
-            fileSize,
-            &status);
-    }
-
-cleanUpAndReturn:
-    fclose(f);
-    delete []fileBuf;
-    ucnv_close(conv);
-    if (U_FAILURE(status)) {
-        errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
-        delete []retPtr;
-        retPtr = 0;
-        ulen   = 0;
-    }
-    return retPtr;
-}
-
-
-
 //--------------------------------------------------------------------------------------------
 //
 //   Run tests from each of the boundary test data files distributed by the Unicode Consortium
--- a/icu4c/source/test/intltest/rbbitst.h
+++ b/icu4c/source/test/intltest/rbbitst.h
@ -54,7 +54,6 @@ public:
    void TestMonkey();

    void TestExtended();
-    UChar *ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status);
    void executeTest(TestParams *, UErrorCode &status);

    void TestWordBreaks();
--- a/icu4c/source/test/intltest/regextst.cpp
+++ b/icu4c/source/test/intltest/regextst.cpp
@ -3865,115 +3865,6 @@ void RegexTest::Errors() {

 }

-
-//-------------------------------------------------------------------------------
-//
-//  Read a text data file, convert it to UChars, and return the data
-//    in one big UChar * buffer, which the caller must delete.
-//
-//--------------------------------------------------------------------------------
-UChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
-                                     const char *defEncoding, UErrorCode &status) {
-    UChar       *retPtr  = NULL;
-    char        *fileBuf = NULL;
-    UConverter* conv     = NULL;
-    FILE        *f       = NULL;
-
-    ulen = 0;
-    if (U_FAILURE(status)) {
-        return retPtr;
-    }
-
-    //
-    //  Open the file.
-    //
-    f = fopen(fileName, "rb");
-    if (f == 0) {
-        dataerrln("Error opening test data file %s\n", fileName);
-        status = U_FILE_ACCESS_ERROR;
-        return NULL;
-    }
-    //
-    //  Read it in
-    //
-    int32_t            fileSize;
-    int32_t            amt_read;
-
-    fseek( f, 0, SEEK_END);
-    fileSize = ftell(f);
-    fileBuf = new char[fileSize];
-    fseek(f, 0, SEEK_SET);
-    amt_read = static_cast<int32_t>(fread(fileBuf, 1, fileSize, f));
-    if (amt_read != fileSize || fileSize <= 0) {
-        errln("Error reading test data file.");
-        goto cleanUpAndReturn;
-    }
-
-    //
-    // Look for a Unicode Signature (BOM) on the data just read
-    //
-    int32_t        signatureLength;
-    const char *   fileBufC;
-    const char*    encoding;
-
-    fileBufC = fileBuf;
-    encoding = ucnv_detectUnicodeSignature(
-        fileBuf, fileSize, &signatureLength, &status);
-    if(encoding!=NULL ){
-        fileBufC  += signatureLength;
-        fileSize  -= signatureLength;
-    } else {
-        encoding = defEncoding;
-        if (strcmp(encoding, "utf-8") == 0) {
-            errln("file %s is missing its BOM", fileName);
-        }
-    }
-
-    //
-    // Open a converter to take the rule file to UTF-16
-    //
-    conv = ucnv_open(encoding, &status);
-    if (U_FAILURE(status)) {
-        goto cleanUpAndReturn;
-    }
-
-    //
-    // Convert the rules to UChar.
-    //  Preflight first to determine required buffer size.
-    //
-    ulen = ucnv_toUChars(conv,
-        NULL,           //  dest,
-        0,              //  destCapacity,
-        fileBufC,
-        fileSize,
-        &status);
-    if (status == U_BUFFER_OVERFLOW_ERROR) {
-        // Buffer Overflow is expected from the preflight operation.
-        status = U_ZERO_ERROR;
-
-        retPtr = new UChar[ulen+1];
-        ucnv_toUChars(conv,
-            retPtr,       //  dest,
-            ulen+1,
-            fileBufC,
-            fileSize,
-            &status);
-    }
-
-cleanUpAndReturn:
-    fclose(f);
-    delete[] fileBuf;
-    ucnv_close(conv);
-    if (U_FAILURE(status)) {
-        errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
-        delete []retPtr;
-        retPtr = 0;
-        ulen   = 0;
-    }
-    return retPtr;
-}
-
-
 //-------------------------------------------------------------------------------
 //
 //   PerlTests  - Run Perl's regular expression tests
--- a/icu4c/source/test/intltest/regextst.h
+++ b/icu4c/source/test/intltest/regextst.h
@ -71,7 +71,6 @@ public:
                            const UnicodeString &input, const char *srcPath, int32_t line);
    virtual void regex_err(const char *pat, int32_t errline, int32_t errcol,
                            UErrorCode expectedStatus, int32_t line);
-    virtual UChar *ReadAndConvertFile(const char *fileName, int32_t &len, const char *charset, UErrorCode &status);
    virtual const char *getPath(char buffer[2048], const char *filename);

    virtual void TestCase11049(const char *pattern, const char *data, UBool expectMatch, int32_t lineNumber);