ICU-21593 Merge ReadAndConvertFile

This commit is contained in:
Frank Tang 2021-04-22 16:52:48 -07:00 committed by Frank Yung-Fong Tang
parent 581c427557
commit 4689706386
10 changed files with 132 additions and 401 deletions

View file

@ -218,7 +218,7 @@ void DecimalFormatTest::DataDrivenTests() {
}
int32_t len;
UChar *testData = ReadAndConvertFile(srcPath, len, status);
UChar *testData = ReadAndConvertFile(srcPath, len, "utf-8", status);
if (U_FAILURE(status)) {
return; /* something went wrong, error already output */
}
@ -481,94 +481,5 @@ void DecimalFormatTest::execFormatTest(int32_t lineNum,
}
//-------------------------------------------------------------------------------
//
// Read a text data file, convert it from UTF-8 to UChars, and return the data
// in one big UChar * buffer, which the caller must delete.
//
// (Lightly modified version of a similar function in regextst.cpp)
//
//--------------------------------------------------------------------------------
UChar *DecimalFormatTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
UErrorCode &status) {
UChar *retPtr = NULL;
char *fileBuf = NULL;
const char *fileBufNoBOM = NULL;
FILE *f = NULL;
ulen = 0;
if (U_FAILURE(status)) {
return retPtr;
}
//
// Open the file.
//
f = fopen(fileName, "rb");
if (f == 0) {
dataerrln("Error opening test data file %s\n", fileName);
status = U_FILE_ACCESS_ERROR;
return NULL;
}
//
// Read it in
//
int32_t fileSize;
int32_t amtRead;
int32_t amtReadNoBOM;
fseek( f, 0, SEEK_END);
fileSize = ftell(f);
fileBuf = new char[fileSize];
fseek(f, 0, SEEK_SET);
amtRead = static_cast<int32_t>(fread(fileBuf, 1, fileSize, f));
if (amtRead != fileSize || fileSize <= 0) {
errln("Error reading test data file.");
goto cleanUpAndReturn;
}
//
// Look for a UTF-8 BOM on the data just read.
// The test data file is UTF-8.
// The BOM needs to be there in the source file to keep the Windows &
// EBCDIC machines happy, so force an error if it goes missing.
// Many Linux editors will silently strip it.
//
fileBufNoBOM = fileBuf + 3;
amtReadNoBOM = amtRead - 3;
if (fileSize<3 || uprv_strncmp(fileBuf, "\xEF\xBB\xBF", 3) != 0) {
// TODO: restore this check.
errln("Test data file %s is missing its BOM", fileName);
fileBufNoBOM = fileBuf;
amtReadNoBOM = amtRead;
}
//
// Find the length of the input in UTF-16 UChars
// (by preflighting the conversion)
//
u_strFromUTF8(NULL, 0, &ulen, fileBufNoBOM, amtReadNoBOM, &status);
//
// Convert file contents from UTF-8 to UTF-16
//
if (status == U_BUFFER_OVERFLOW_ERROR) {
// Buffer Overflow is expected from the preflight operation.
status = U_ZERO_ERROR;
retPtr = new UChar[ulen+1];
u_strFromUTF8(retPtr, ulen+1, NULL, fileBufNoBOM, amtReadNoBOM, &status);
}
cleanUpAndReturn:
fclose(f);
delete[] fileBuf;
if (U_FAILURE(status)) {
errln("ICU Error \"%s\"\n", u_errorName(status));
delete retPtr;
retPtr = NULL;
}
return retPtr;
}
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */

View file

@ -34,8 +34,6 @@ public:
// The following are test functions that are visible from the intltest test framework.
virtual void DataDrivenTests();
// The following functions are internal to the decimal format tests.
virtual UChar *ReadAndConvertFile(const char *fileName, int32_t &len, UErrorCode &status);
virtual const char *getPath(char buffer[2048], const char *filename);
virtual void execParseTest(int32_t lineNum,
const UnicodeString &inputText,

View file

@ -24,6 +24,7 @@
#include "unicode/uidna.h"
#include "unicode/utf16.h"
#include "idnaconf.h"
#include "charstr.h"
static const UChar C_TAG[] = {0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0}; // =====
static const UChar C_NAMEZONE[] = {0x6E, 0x61, 0x6D, 0x65, 0x7A, 0x6F, 0x6E, 0x65, 0}; // namezone
@ -55,82 +56,6 @@ IdnaConfTest::~IdnaConfTest(){
}
#if !UCONFIG_NO_IDNA
/* this function is modified from RBBITest::ReadAndConvertFile()
*
*/
UBool IdnaConfTest::ReadAndConvertFile(){
char * source = NULL;
size_t source_len;
// read the test data file to memory
FILE* f = NULL;
UErrorCode status = U_ZERO_ERROR;
const char *path = IntlTest::getSourceTestData(status);
if (U_FAILURE(status)) {
errln("%s", u_errorName(status));
return FALSE;
}
const char* name = "idna_conf.txt"; // test data file
int t = static_cast<int>(strlen(path) + strlen(name) + 1);
char* absolute_name = new char[t];
strcpy(absolute_name, path);
strcat(absolute_name, name);
f = fopen(absolute_name, "rb");
delete [] absolute_name;
if (f == NULL){
dataerrln("fopen error on %s", name);
return FALSE;
}
fseek( f, 0, SEEK_END);
if ((source_len = ftell(f)) <= 0){
errln("Error reading test data file.");
fclose(f);
return FALSE;
}
source = new char[source_len];
fseek(f, 0, SEEK_SET);
if (fread(source, 1, source_len, f) != source_len) {
errln("Error reading test data file.");
delete [] source;
fclose(f);
return FALSE;
}
fclose(f);
// convert the UTF-8 encoded stream to UTF-16 stream
UConverter* conv = ucnv_open("utf-8", &status);
int dest_len = ucnv_toUChars(conv,
NULL, // dest,
0, // destCapacity,
source,
static_cast<int32_t>(source_len),
&status);
if (status == U_BUFFER_OVERFLOW_ERROR) {
// Buffer Overflow is expected from the preflight operation.
status = U_ZERO_ERROR;
UChar * dest = NULL;
dest = new UChar[ dest_len + 1];
ucnv_toUChars(conv, dest, dest_len + 1, source, static_cast<int32_t>(source_len), &status);
// Do not know the "if possible" behavior of ucnv_toUChars()
// Do it by ourself.
dest[dest_len] = 0;
len = dest_len;
base = dest;
delete [] source;
ucnv_close(conv);
return TRUE; // The buffer will owned by caller.
}
errln("UConverter error: %s", u_errorName(status));
delete [] source;
ucnv_close(conv);
return FALSE;
}
int IdnaConfTest::isNewlineMark(){
static const UChar LF = 0x0a;
@ -280,7 +205,18 @@ void IdnaConfTest::Call(){
}
void IdnaConfTest::Test(void){
if (!ReadAndConvertFile())return;
UErrorCode status = U_ZERO_ERROR;
//
// Open and read the test data file.
//
const char *testDataDirectory = IntlTest::getSourceTestData(status);
CharString testFileName(testDataDirectory, -1, status);
testFileName.append("idna_conf.txt", -1, status);
base = ReadAndConvertFile(testFileName.data(), len, "UTF-8", status);
if (U_FAILURE(status)) {
return;
}
UnicodeString s;
UnicodeString key;

View file

@ -32,7 +32,6 @@ private:
int len ;
int curOffset;
UBool ReadAndConvertFile();
int isNewlineMark();
UBool ReadOneLine(UnicodeString&);

View file

@ -2312,6 +2312,122 @@ const char* IntlTest::getProperty(const char* prop) {
return val;
}
//-------------------------------------------------------------------------------
//
// ReadAndConvertFile Read a text data file, convert it to UChars, and
// return the data in one big UChar * buffer, which the caller must delete.
//
// parameters:
// fileName: the name of the file, with no directory part. The test data directory
// is assumed.
// ulen an out parameter, receives the actual length (in UChars) of the file data.
// encoding The file encoding. If the file contains a BOM, that will override the encoding
// specified here. The BOM, if it exists, will be stripped from the returned data.
// Pass NULL for the system default encoding.
// status
// returns:
// The file data, converted to UChar.
// The caller must delete this when done with
// delete [] theBuffer;
//
//
//--------------------------------------------------------------------------------
UChar *IntlTest::ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status) {
UChar *retPtr = NULL;
char *fileBuf = NULL;
UConverter* conv = NULL;
FILE *f = NULL;
ulen = 0;
if (U_FAILURE(status)) {
return retPtr;
}
//
// Open the file.
//
f = fopen(fileName, "rb");
if (f == 0) {
dataerrln("Error opening test data file %s\n", fileName);
status = U_FILE_ACCESS_ERROR;
return NULL;
}
//
// Read it in
//
int fileSize;
int amt_read;
fseek( f, 0, SEEK_END);
fileSize = ftell(f);
fileBuf = new char[fileSize];
fseek(f, 0, SEEK_SET);
amt_read = static_cast<int>(fread(fileBuf, 1, fileSize, f));
if (amt_read != fileSize || fileSize <= 0) {
errln("Error reading test data file.");
goto cleanUpAndReturn;
}
//
// Look for a Unicode Signature (BOM) on the data just read
//
int32_t signatureLength;
const char * fileBufC;
const char* bomEncoding;
fileBufC = fileBuf;
bomEncoding = ucnv_detectUnicodeSignature(
fileBuf, fileSize, &signatureLength, &status);
if(bomEncoding!=NULL ){
fileBufC += signatureLength;
fileSize -= signatureLength;
encoding = bomEncoding;
}
//
// Open a converter to take the rule file to UTF-16
//
conv = ucnv_open(encoding, &status);
if (U_FAILURE(status)) {
goto cleanUpAndReturn;
}
//
// Convert the rules to UChar.
// Preflight first to determine required buffer size.
//
ulen = ucnv_toUChars(conv,
NULL, // dest,
0, // destCapacity,
fileBufC,
fileSize,
&status);
if (status == U_BUFFER_OVERFLOW_ERROR) {
// Buffer Overflow is expected from the preflight operation.
status = U_ZERO_ERROR;
retPtr = new UChar[ulen+1];
ucnv_toUChars(conv,
retPtr, // dest,
ulen+1,
fileBufC,
fileSize,
&status);
}
cleanUpAndReturn:
fclose(f);
delete []fileBuf;
ucnv_close(conv);
if (U_FAILURE(status)) {
errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
delete []retPtr;
retPtr = 0;
ulen = 0;
}
return retPtr;
}
/*
* Hey, Emacs, please set the following:
*

View file

@ -416,6 +416,8 @@ public:
virtual const char* getTestDataPath(UErrorCode& err);
static const char* getSourceTestData(UErrorCode& err);
static char *getUnidataPath(char path[]);
UChar *ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status);
// static members
public:

View file

@ -1165,126 +1165,6 @@ void RBBITest::TestDictRules() {
//-------------------------------------------------------------------------------
//
// ReadAndConvertFile Read a text data file, convert it to UChars, and
// return the data in one big UChar * buffer, which the caller must delete.
//
// parameters:
// fileName: the name of the file, with no directory part. The test data directory
// is assumed.
// ulen an out parameter, receives the actual length (in UChars) of the file data.
// encoding The file encoding. If the file contains a BOM, that will override the encoding
// specified here. The BOM, if it exists, will be stripped from the returned data.
// Pass NULL for the system default encoding.
// status
// returns:
// The file data, converted to UChar.
// The caller must delete this when done with
// delete [] theBuffer;
//
// TODO: This is a clone of RegexTest::ReadAndConvertFile.
// Move this function to some common place.
//
//--------------------------------------------------------------------------------
UChar *RBBITest::ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status) {
UChar *retPtr = NULL;
char *fileBuf = NULL;
UConverter* conv = NULL;
FILE *f = NULL;
ulen = 0;
if (U_FAILURE(status)) {
return retPtr;
}
//
// Open the file.
//
f = fopen(fileName, "rb");
if (f == 0) {
dataerrln("Error opening test data file %s\n", fileName);
status = U_FILE_ACCESS_ERROR;
return NULL;
}
//
// Read it in
//
int fileSize;
int amt_read;
fseek( f, 0, SEEK_END);
fileSize = ftell(f);
fileBuf = new char[fileSize];
fseek(f, 0, SEEK_SET);
amt_read = static_cast<int>(fread(fileBuf, 1, fileSize, f));
if (amt_read != fileSize || fileSize <= 0) {
errln("Error reading test data file.");
goto cleanUpAndReturn;
}
//
// Look for a Unicode Signature (BOM) on the data just read
//
int32_t signatureLength;
const char * fileBufC;
const char* bomEncoding;
fileBufC = fileBuf;
bomEncoding = ucnv_detectUnicodeSignature(
fileBuf, fileSize, &signatureLength, &status);
if(bomEncoding!=NULL ){
fileBufC += signatureLength;
fileSize -= signatureLength;
encoding = bomEncoding;
}
//
// Open a converter to take the rule file to UTF-16
//
conv = ucnv_open(encoding, &status);
if (U_FAILURE(status)) {
goto cleanUpAndReturn;
}
//
// Convert the rules to UChar.
// Preflight first to determine required buffer size.
//
ulen = ucnv_toUChars(conv,
NULL, // dest,
0, // destCapacity,
fileBufC,
fileSize,
&status);
if (status == U_BUFFER_OVERFLOW_ERROR) {
// Buffer Overflow is expected from the preflight operation.
status = U_ZERO_ERROR;
retPtr = new UChar[ulen+1];
ucnv_toUChars(conv,
retPtr, // dest,
ulen+1,
fileBufC,
fileSize,
&status);
}
cleanUpAndReturn:
fclose(f);
delete []fileBuf;
ucnv_close(conv);
if (U_FAILURE(status)) {
errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
delete []retPtr;
retPtr = 0;
ulen = 0;
}
return retPtr;
}
//--------------------------------------------------------------------------------------------
//
// Run tests from each of the boundary test data files distributed by the Unicode Consortium

View file

@ -54,7 +54,6 @@ public:
void TestMonkey();
void TestExtended();
UChar *ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status);
void executeTest(TestParams *, UErrorCode &status);
void TestWordBreaks();

View file

@ -3865,115 +3865,6 @@ void RegexTest::Errors() {
}
//-------------------------------------------------------------------------------
//
// Read a text data file, convert it to UChars, and return the data
// in one big UChar * buffer, which the caller must delete.
//
//--------------------------------------------------------------------------------
UChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
const char *defEncoding, UErrorCode &status) {
UChar *retPtr = NULL;
char *fileBuf = NULL;
UConverter* conv = NULL;
FILE *f = NULL;
ulen = 0;
if (U_FAILURE(status)) {
return retPtr;
}
//
// Open the file.
//
f = fopen(fileName, "rb");
if (f == 0) {
dataerrln("Error opening test data file %s\n", fileName);
status = U_FILE_ACCESS_ERROR;
return NULL;
}
//
// Read it in
//
int32_t fileSize;
int32_t amt_read;
fseek( f, 0, SEEK_END);
fileSize = ftell(f);
fileBuf = new char[fileSize];
fseek(f, 0, SEEK_SET);
amt_read = static_cast<int32_t>(fread(fileBuf, 1, fileSize, f));
if (amt_read != fileSize || fileSize <= 0) {
errln("Error reading test data file.");
goto cleanUpAndReturn;
}
//
// Look for a Unicode Signature (BOM) on the data just read
//
int32_t signatureLength;
const char * fileBufC;
const char* encoding;
fileBufC = fileBuf;
encoding = ucnv_detectUnicodeSignature(
fileBuf, fileSize, &signatureLength, &status);
if(encoding!=NULL ){
fileBufC += signatureLength;
fileSize -= signatureLength;
} else {
encoding = defEncoding;
if (strcmp(encoding, "utf-8") == 0) {
errln("file %s is missing its BOM", fileName);
}
}
//
// Open a converter to take the rule file to UTF-16
//
conv = ucnv_open(encoding, &status);
if (U_FAILURE(status)) {
goto cleanUpAndReturn;
}
//
// Convert the rules to UChar.
// Preflight first to determine required buffer size.
//
ulen = ucnv_toUChars(conv,
NULL, // dest,
0, // destCapacity,
fileBufC,
fileSize,
&status);
if (status == U_BUFFER_OVERFLOW_ERROR) {
// Buffer Overflow is expected from the preflight operation.
status = U_ZERO_ERROR;
retPtr = new UChar[ulen+1];
ucnv_toUChars(conv,
retPtr, // dest,
ulen+1,
fileBufC,
fileSize,
&status);
}
cleanUpAndReturn:
fclose(f);
delete[] fileBuf;
ucnv_close(conv);
if (U_FAILURE(status)) {
errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
delete []retPtr;
retPtr = 0;
ulen = 0;
}
return retPtr;
}
//-------------------------------------------------------------------------------
//
// PerlTests - Run Perl's regular expression tests

View file

@ -71,7 +71,6 @@ public:
const UnicodeString &input, const char *srcPath, int32_t line);
virtual void regex_err(const char *pat, int32_t errline, int32_t errcol,
UErrorCode expectedStatus, int32_t line);
virtual UChar *ReadAndConvertFile(const char *fileName, int32_t &len, const char *charset, UErrorCode &status);
virtual const char *getPath(char buffer[2048], const char *filename);
virtual void TestCase11049(const char *pattern, const char *data, UBool expectMatch, int32_t lineNumber);