From 4689706386a8289755a130fc3d7439ac7cc802d1 Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Thu, 22 Apr 2021 16:52:48 -0700 Subject: [PATCH] ICU-21593 Merge ReadAndConvertFile --- icu4c/source/test/intltest/dcfmtest.cpp | 91 +----------------- icu4c/source/test/intltest/dcfmtest.h | 2 - icu4c/source/test/intltest/idnaconf.cpp | 90 +++--------------- icu4c/source/test/intltest/idnaconf.h | 1 - icu4c/source/test/intltest/intltest.cpp | 116 +++++++++++++++++++++++ icu4c/source/test/intltest/intltest.h | 2 + icu4c/source/test/intltest/rbbitst.cpp | 120 ------------------------ icu4c/source/test/intltest/rbbitst.h | 1 - icu4c/source/test/intltest/regextst.cpp | 109 --------------------- icu4c/source/test/intltest/regextst.h | 1 - 10 files changed, 132 insertions(+), 401 deletions(-) diff --git a/icu4c/source/test/intltest/dcfmtest.cpp b/icu4c/source/test/intltest/dcfmtest.cpp index 0f4c943bf03..13994701620 100644 --- a/icu4c/source/test/intltest/dcfmtest.cpp +++ b/icu4c/source/test/intltest/dcfmtest.cpp @@ -218,7 +218,7 @@ void DecimalFormatTest::DataDrivenTests() { } int32_t len; - UChar *testData = ReadAndConvertFile(srcPath, len, status); + UChar *testData = ReadAndConvertFile(srcPath, len, "utf-8", status); if (U_FAILURE(status)) { return; /* something went wrong, error already output */ } @@ -481,94 +481,5 @@ void DecimalFormatTest::execFormatTest(int32_t lineNum, } -//------------------------------------------------------------------------------- -// -// Read a text data file, convert it from UTF-8 to UChars, and return the data -// in one big UChar * buffer, which the caller must delete. -// -// (Lightly modified version of a similar function in regextst.cpp) -// -//-------------------------------------------------------------------------------- -UChar *DecimalFormatTest::ReadAndConvertFile(const char *fileName, int32_t &ulen, - UErrorCode &status) { - UChar *retPtr = NULL; - char *fileBuf = NULL; - const char *fileBufNoBOM = NULL; - FILE *f = NULL; - - ulen = 0; - if (U_FAILURE(status)) { - return retPtr; - } - - // - // Open the file. - // - f = fopen(fileName, "rb"); - if (f == 0) { - dataerrln("Error opening test data file %s\n", fileName); - status = U_FILE_ACCESS_ERROR; - return NULL; - } - // - // Read it in - // - int32_t fileSize; - int32_t amtRead; - int32_t amtReadNoBOM; - - fseek( f, 0, SEEK_END); - fileSize = ftell(f); - fileBuf = new char[fileSize]; - fseek(f, 0, SEEK_SET); - amtRead = static_cast(fread(fileBuf, 1, fileSize, f)); - if (amtRead != fileSize || fileSize <= 0) { - errln("Error reading test data file."); - goto cleanUpAndReturn; - } - - // - // Look for a UTF-8 BOM on the data just read. - // The test data file is UTF-8. - // The BOM needs to be there in the source file to keep the Windows & - // EBCDIC machines happy, so force an error if it goes missing. - // Many Linux editors will silently strip it. - // - fileBufNoBOM = fileBuf + 3; - amtReadNoBOM = amtRead - 3; - if (fileSize<3 || uprv_strncmp(fileBuf, "\xEF\xBB\xBF", 3) != 0) { - // TODO: restore this check. - errln("Test data file %s is missing its BOM", fileName); - fileBufNoBOM = fileBuf; - amtReadNoBOM = amtRead; - } - - // - // Find the length of the input in UTF-16 UChars - // (by preflighting the conversion) - // - u_strFromUTF8(NULL, 0, &ulen, fileBufNoBOM, amtReadNoBOM, &status); - - // - // Convert file contents from UTF-8 to UTF-16 - // - if (status == U_BUFFER_OVERFLOW_ERROR) { - // Buffer Overflow is expected from the preflight operation. - status = U_ZERO_ERROR; - retPtr = new UChar[ulen+1]; - u_strFromUTF8(retPtr, ulen+1, NULL, fileBufNoBOM, amtReadNoBOM, &status); - } - -cleanUpAndReturn: - fclose(f); - delete[] fileBuf; - if (U_FAILURE(status)) { - errln("ICU Error \"%s\"\n", u_errorName(status)); - delete retPtr; - retPtr = NULL; - } - return retPtr; -} - #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ diff --git a/icu4c/source/test/intltest/dcfmtest.h b/icu4c/source/test/intltest/dcfmtest.h index e7c43c15891..df5168e322f 100644 --- a/icu4c/source/test/intltest/dcfmtest.h +++ b/icu4c/source/test/intltest/dcfmtest.h @@ -34,8 +34,6 @@ public: // The following are test functions that are visible from the intltest test framework. virtual void DataDrivenTests(); - // The following functions are internal to the decimal format tests. - virtual UChar *ReadAndConvertFile(const char *fileName, int32_t &len, UErrorCode &status); virtual const char *getPath(char buffer[2048], const char *filename); virtual void execParseTest(int32_t lineNum, const UnicodeString &inputText, diff --git a/icu4c/source/test/intltest/idnaconf.cpp b/icu4c/source/test/intltest/idnaconf.cpp index cc8d89a9426..3b5c490883f 100644 --- a/icu4c/source/test/intltest/idnaconf.cpp +++ b/icu4c/source/test/intltest/idnaconf.cpp @@ -24,6 +24,7 @@ #include "unicode/uidna.h" #include "unicode/utf16.h" #include "idnaconf.h" +#include "charstr.h" static const UChar C_TAG[] = {0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0}; // ===== static const UChar C_NAMEZONE[] = {0x6E, 0x61, 0x6D, 0x65, 0x7A, 0x6F, 0x6E, 0x65, 0}; // namezone @@ -55,82 +56,6 @@ IdnaConfTest::~IdnaConfTest(){ } #if !UCONFIG_NO_IDNA -/* this function is modified from RBBITest::ReadAndConvertFile() - * - */ -UBool IdnaConfTest::ReadAndConvertFile(){ - - char * source = NULL; - size_t source_len; - - // read the test data file to memory - FILE* f = NULL; - UErrorCode status = U_ZERO_ERROR; - - const char *path = IntlTest::getSourceTestData(status); - if (U_FAILURE(status)) { - errln("%s", u_errorName(status)); - return FALSE; - } - - const char* name = "idna_conf.txt"; // test data file - int t = static_cast(strlen(path) + strlen(name) + 1); - char* absolute_name = new char[t]; - strcpy(absolute_name, path); - strcat(absolute_name, name); - f = fopen(absolute_name, "rb"); - delete [] absolute_name; - - if (f == NULL){ - dataerrln("fopen error on %s", name); - return FALSE; - } - - fseek( f, 0, SEEK_END); - if ((source_len = ftell(f)) <= 0){ - errln("Error reading test data file."); - fclose(f); - return FALSE; - } - - source = new char[source_len]; - fseek(f, 0, SEEK_SET); - if (fread(source, 1, source_len, f) != source_len) { - errln("Error reading test data file."); - delete [] source; - fclose(f); - return FALSE; - } - fclose(f); - - // convert the UTF-8 encoded stream to UTF-16 stream - UConverter* conv = ucnv_open("utf-8", &status); - int dest_len = ucnv_toUChars(conv, - NULL, // dest, - 0, // destCapacity, - source, - static_cast(source_len), - &status); - if (status == U_BUFFER_OVERFLOW_ERROR) { - // Buffer Overflow is expected from the preflight operation. - status = U_ZERO_ERROR; - UChar * dest = NULL; - dest = new UChar[ dest_len + 1]; - ucnv_toUChars(conv, dest, dest_len + 1, source, static_cast(source_len), &status); - // Do not know the "if possible" behavior of ucnv_toUChars() - // Do it by ourself. - dest[dest_len] = 0; - len = dest_len; - base = dest; - delete [] source; - ucnv_close(conv); - return TRUE; // The buffer will owned by caller. - } - errln("UConverter error: %s", u_errorName(status)); - delete [] source; - ucnv_close(conv); - return FALSE; -} int IdnaConfTest::isNewlineMark(){ static const UChar LF = 0x0a; @@ -280,7 +205,18 @@ void IdnaConfTest::Call(){ } void IdnaConfTest::Test(void){ - if (!ReadAndConvertFile())return; + UErrorCode status = U_ZERO_ERROR; + // + // Open and read the test data file. + // + const char *testDataDirectory = IntlTest::getSourceTestData(status); + CharString testFileName(testDataDirectory, -1, status); + testFileName.append("idna_conf.txt", -1, status); + + base = ReadAndConvertFile(testFileName.data(), len, "UTF-8", status); + if (U_FAILURE(status)) { + return; + } UnicodeString s; UnicodeString key; diff --git a/icu4c/source/test/intltest/idnaconf.h b/icu4c/source/test/intltest/idnaconf.h index 956cea6943d..e88291a9af7 100644 --- a/icu4c/source/test/intltest/idnaconf.h +++ b/icu4c/source/test/intltest/idnaconf.h @@ -32,7 +32,6 @@ private: int len ; int curOffset; - UBool ReadAndConvertFile(); int isNewlineMark(); UBool ReadOneLine(UnicodeString&); diff --git a/icu4c/source/test/intltest/intltest.cpp b/icu4c/source/test/intltest/intltest.cpp index e9db879b9e8..aafe7f8ed96 100644 --- a/icu4c/source/test/intltest/intltest.cpp +++ b/icu4c/source/test/intltest/intltest.cpp @@ -2312,6 +2312,122 @@ const char* IntlTest::getProperty(const char* prop) { return val; } +//------------------------------------------------------------------------------- +// +// ReadAndConvertFile Read a text data file, convert it to UChars, and +// return the data in one big UChar * buffer, which the caller must delete. +// +// parameters: +// fileName: the name of the file, with no directory part. The test data directory +// is assumed. +// ulen an out parameter, receives the actual length (in UChars) of the file data. +// encoding The file encoding. If the file contains a BOM, that will override the encoding +// specified here. The BOM, if it exists, will be stripped from the returned data. +// Pass NULL for the system default encoding. +// status +// returns: +// The file data, converted to UChar. +// The caller must delete this when done with +// delete [] theBuffer; +// +// +//-------------------------------------------------------------------------------- +UChar *IntlTest::ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status) { + UChar *retPtr = NULL; + char *fileBuf = NULL; + UConverter* conv = NULL; + FILE *f = NULL; + + ulen = 0; + if (U_FAILURE(status)) { + return retPtr; + } + + // + // Open the file. + // + f = fopen(fileName, "rb"); + if (f == 0) { + dataerrln("Error opening test data file %s\n", fileName); + status = U_FILE_ACCESS_ERROR; + return NULL; + } + // + // Read it in + // + int fileSize; + int amt_read; + + fseek( f, 0, SEEK_END); + fileSize = ftell(f); + fileBuf = new char[fileSize]; + fseek(f, 0, SEEK_SET); + amt_read = static_cast(fread(fileBuf, 1, fileSize, f)); + if (amt_read != fileSize || fileSize <= 0) { + errln("Error reading test data file."); + goto cleanUpAndReturn; + } + + // + // Look for a Unicode Signature (BOM) on the data just read + // + int32_t signatureLength; + const char * fileBufC; + const char* bomEncoding; + + fileBufC = fileBuf; + bomEncoding = ucnv_detectUnicodeSignature( + fileBuf, fileSize, &signatureLength, &status); + if(bomEncoding!=NULL ){ + fileBufC += signatureLength; + fileSize -= signatureLength; + encoding = bomEncoding; + } + + // + // Open a converter to take the rule file to UTF-16 + // + conv = ucnv_open(encoding, &status); + if (U_FAILURE(status)) { + goto cleanUpAndReturn; + } + + // + // Convert the rules to UChar. + // Preflight first to determine required buffer size. + // + ulen = ucnv_toUChars(conv, + NULL, // dest, + 0, // destCapacity, + fileBufC, + fileSize, + &status); + if (status == U_BUFFER_OVERFLOW_ERROR) { + // Buffer Overflow is expected from the preflight operation. + status = U_ZERO_ERROR; + + retPtr = new UChar[ulen+1]; + ucnv_toUChars(conv, + retPtr, // dest, + ulen+1, + fileBufC, + fileSize, + &status); + } + +cleanUpAndReturn: + fclose(f); + delete []fileBuf; + ucnv_close(conv); + if (U_FAILURE(status)) { + errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); + delete []retPtr; + retPtr = 0; + ulen = 0; + } + return retPtr; +} + /* * Hey, Emacs, please set the following: * diff --git a/icu4c/source/test/intltest/intltest.h b/icu4c/source/test/intltest/intltest.h index 1d8146bb4a8..af06cd3cd9a 100644 --- a/icu4c/source/test/intltest/intltest.h +++ b/icu4c/source/test/intltest/intltest.h @@ -416,6 +416,8 @@ public: virtual const char* getTestDataPath(UErrorCode& err); static const char* getSourceTestData(UErrorCode& err); static char *getUnidataPath(char path[]); + UChar *ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status); + // static members public: diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp index b02478c48bf..b0a5773646a 100644 --- a/icu4c/source/test/intltest/rbbitst.cpp +++ b/icu4c/source/test/intltest/rbbitst.cpp @@ -1165,126 +1165,6 @@ void RBBITest::TestDictRules() { -//------------------------------------------------------------------------------- -// -// ReadAndConvertFile Read a text data file, convert it to UChars, and -// return the data in one big UChar * buffer, which the caller must delete. -// -// parameters: -// fileName: the name of the file, with no directory part. The test data directory -// is assumed. -// ulen an out parameter, receives the actual length (in UChars) of the file data. -// encoding The file encoding. If the file contains a BOM, that will override the encoding -// specified here. The BOM, if it exists, will be stripped from the returned data. -// Pass NULL for the system default encoding. -// status -// returns: -// The file data, converted to UChar. -// The caller must delete this when done with -// delete [] theBuffer; -// -// TODO: This is a clone of RegexTest::ReadAndConvertFile. -// Move this function to some common place. -// -//-------------------------------------------------------------------------------- -UChar *RBBITest::ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status) { - UChar *retPtr = NULL; - char *fileBuf = NULL; - UConverter* conv = NULL; - FILE *f = NULL; - - ulen = 0; - if (U_FAILURE(status)) { - return retPtr; - } - - // - // Open the file. - // - f = fopen(fileName, "rb"); - if (f == 0) { - dataerrln("Error opening test data file %s\n", fileName); - status = U_FILE_ACCESS_ERROR; - return NULL; - } - // - // Read it in - // - int fileSize; - int amt_read; - - fseek( f, 0, SEEK_END); - fileSize = ftell(f); - fileBuf = new char[fileSize]; - fseek(f, 0, SEEK_SET); - amt_read = static_cast(fread(fileBuf, 1, fileSize, f)); - if (amt_read != fileSize || fileSize <= 0) { - errln("Error reading test data file."); - goto cleanUpAndReturn; - } - - // - // Look for a Unicode Signature (BOM) on the data just read - // - int32_t signatureLength; - const char * fileBufC; - const char* bomEncoding; - - fileBufC = fileBuf; - bomEncoding = ucnv_detectUnicodeSignature( - fileBuf, fileSize, &signatureLength, &status); - if(bomEncoding!=NULL ){ - fileBufC += signatureLength; - fileSize -= signatureLength; - encoding = bomEncoding; - } - - // - // Open a converter to take the rule file to UTF-16 - // - conv = ucnv_open(encoding, &status); - if (U_FAILURE(status)) { - goto cleanUpAndReturn; - } - - // - // Convert the rules to UChar. - // Preflight first to determine required buffer size. - // - ulen = ucnv_toUChars(conv, - NULL, // dest, - 0, // destCapacity, - fileBufC, - fileSize, - &status); - if (status == U_BUFFER_OVERFLOW_ERROR) { - // Buffer Overflow is expected from the preflight operation. - status = U_ZERO_ERROR; - - retPtr = new UChar[ulen+1]; - ucnv_toUChars(conv, - retPtr, // dest, - ulen+1, - fileBufC, - fileSize, - &status); - } - -cleanUpAndReturn: - fclose(f); - delete []fileBuf; - ucnv_close(conv); - if (U_FAILURE(status)) { - errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); - delete []retPtr; - retPtr = 0; - ulen = 0; - } - return retPtr; -} - - - //-------------------------------------------------------------------------------------------- // // Run tests from each of the boundary test data files distributed by the Unicode Consortium diff --git a/icu4c/source/test/intltest/rbbitst.h b/icu4c/source/test/intltest/rbbitst.h index 754b3e69ea3..821e2a41755 100644 --- a/icu4c/source/test/intltest/rbbitst.h +++ b/icu4c/source/test/intltest/rbbitst.h @@ -54,7 +54,6 @@ public: void TestMonkey(); void TestExtended(); - UChar *ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status); void executeTest(TestParams *, UErrorCode &status); void TestWordBreaks(); diff --git a/icu4c/source/test/intltest/regextst.cpp b/icu4c/source/test/intltest/regextst.cpp index 5f7e36b3ae1..13fb4db5f43 100644 --- a/icu4c/source/test/intltest/regextst.cpp +++ b/icu4c/source/test/intltest/regextst.cpp @@ -3865,115 +3865,6 @@ void RegexTest::Errors() { } - -//------------------------------------------------------------------------------- -// -// Read a text data file, convert it to UChars, and return the data -// in one big UChar * buffer, which the caller must delete. -// -//-------------------------------------------------------------------------------- -UChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen, - const char *defEncoding, UErrorCode &status) { - UChar *retPtr = NULL; - char *fileBuf = NULL; - UConverter* conv = NULL; - FILE *f = NULL; - - ulen = 0; - if (U_FAILURE(status)) { - return retPtr; - } - - // - // Open the file. - // - f = fopen(fileName, "rb"); - if (f == 0) { - dataerrln("Error opening test data file %s\n", fileName); - status = U_FILE_ACCESS_ERROR; - return NULL; - } - // - // Read it in - // - int32_t fileSize; - int32_t amt_read; - - fseek( f, 0, SEEK_END); - fileSize = ftell(f); - fileBuf = new char[fileSize]; - fseek(f, 0, SEEK_SET); - amt_read = static_cast(fread(fileBuf, 1, fileSize, f)); - if (amt_read != fileSize || fileSize <= 0) { - errln("Error reading test data file."); - goto cleanUpAndReturn; - } - - // - // Look for a Unicode Signature (BOM) on the data just read - // - int32_t signatureLength; - const char * fileBufC; - const char* encoding; - - fileBufC = fileBuf; - encoding = ucnv_detectUnicodeSignature( - fileBuf, fileSize, &signatureLength, &status); - if(encoding!=NULL ){ - fileBufC += signatureLength; - fileSize -= signatureLength; - } else { - encoding = defEncoding; - if (strcmp(encoding, "utf-8") == 0) { - errln("file %s is missing its BOM", fileName); - } - } - - // - // Open a converter to take the rule file to UTF-16 - // - conv = ucnv_open(encoding, &status); - if (U_FAILURE(status)) { - goto cleanUpAndReturn; - } - - // - // Convert the rules to UChar. - // Preflight first to determine required buffer size. - // - ulen = ucnv_toUChars(conv, - NULL, // dest, - 0, // destCapacity, - fileBufC, - fileSize, - &status); - if (status == U_BUFFER_OVERFLOW_ERROR) { - // Buffer Overflow is expected from the preflight operation. - status = U_ZERO_ERROR; - - retPtr = new UChar[ulen+1]; - ucnv_toUChars(conv, - retPtr, // dest, - ulen+1, - fileBufC, - fileSize, - &status); - } - -cleanUpAndReturn: - fclose(f); - delete[] fileBuf; - ucnv_close(conv); - if (U_FAILURE(status)) { - errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); - delete []retPtr; - retPtr = 0; - ulen = 0; - } - return retPtr; -} - - //------------------------------------------------------------------------------- // // PerlTests - Run Perl's regular expression tests diff --git a/icu4c/source/test/intltest/regextst.h b/icu4c/source/test/intltest/regextst.h index 46494c568fc..0dc1d7ef6bf 100644 --- a/icu4c/source/test/intltest/regextst.h +++ b/icu4c/source/test/intltest/regextst.h @@ -71,7 +71,6 @@ public: const UnicodeString &input, const char *srcPath, int32_t line); virtual void regex_err(const char *pat, int32_t errline, int32_t errcol, UErrorCode expectedStatus, int32_t line); - virtual UChar *ReadAndConvertFile(const char *fileName, int32_t &len, const char *charset, UErrorCode &status); virtual const char *getPath(char buffer[2048], const char *filename); virtual void TestCase11049(const char *pattern, const char *data, UBool expectMatch, int32_t lineNumber);