ICU-2334 initial checkin to support test file driven tests

X-SVN-Rev: 14752
This commit is contained in:
Alan Liu 2004-03-25 02:19:11 +00:00
parent 25e9c193f1
commit 7b67501395
5 changed files with 440 additions and 0 deletions

View file

@ -646,6 +646,18 @@
<File
RelativePath=".\testutil.h">
</File>
<File
RelativePath=".\textfile.cpp">
</File>
<File
RelativePath=".\textfile.h">
</File>
<File
RelativePath=".\tokiter.cpp">
</File>
<File
RelativePath=".\tokiter.h">
</File>
</Filter>
<Filter
Name="normalization"

View file

@ -0,0 +1,177 @@
/*
**********************************************************************
* Copyright (c) 2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: March 19 2004
* Since: ICU 3.0
**********************************************************************
*/
#include "textfile.h"
#include "cmemory.h"
#include "cstring.h"
#include "intltest.h"
#include "util.h"
// If the symbol CCP is defined, then the 'name' and 'encoding'
// constructor parameters are copied. Otherwise they are aliased.
// #define CCP
TextFile::TextFile(const char* _name, const char* _encoding, UErrorCode& ec) :
file(0),
name(0), encoding(0),
buffer(0),
capacity(0),
lineNo(0)
{
if (U_FAILURE(ec) || _name == 0 || _encoding == 0) {
if (U_SUCCESS(ec)) {
ec = U_ILLEGAL_ARGUMENT_ERROR;
}
return;
}
#ifdef CCP
name = uprv_malloc(uprv_strlen(_name) + 1);
encoding = uprv_malloc(uprv_strlen(_encoding) + 1);
if (name == 0 || encoding == 0) {
ec = U_MEMORY_ALLOCATION_ERROR;
return;
}
uprv_strcpy(name, _name);
uprv_strcpy(encoding, _encoding);
#else
name = (char*) _name;
encoding = (char*) _encoding;
#endif
const char* testDir = IntlTest::loadTestData(ec);
if (U_FAILURE(ec)) {
return;
}
if (!ensureCapacity(uprv_strlen(testDir) + uprv_strlen(name) + 8)) {
ec = U_MEMORY_ALLOCATION_ERROR;
return;
}
uprv_strcpy(buffer, testDir);
char* index = uprv_strrchr(buffer, (char)U_FILE_SEP_CHAR);
if (index) index[1] = 0;
uprv_strcat(buffer, ".." U_FILE_SEP_STRING);
uprv_strcat(buffer, name);
file = T_FileStream_open(buffer, "rb");
if (file == 0) {
ec = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
}
TextFile::~TextFile() {
if (file != 0) T_FileStream_close(file);
if (buffer != 0) uprv_free(buffer);
#ifdef CCP
uprv_free(name);
uprv_free(encoding);
#endif
}
UBool TextFile::readLine(UnicodeString& line, UErrorCode& ec) {
if (T_FileStream_eof(file)) {
return FALSE;
}
// Note: 'buffer' may change after ensureCapacity() is called,
// so don't use
// p=buffer; *p++=c;
// but rather
// i=; buffer[i++]=c;
int32_t n = 0;
for (;;) {
int c = T_FileStream_getc(file); // sic: int, not int32_t
if (c < 0 || c == 0xD || c == 0xA) {
// consume 0xA following 0xD
if (c == 0xD) {
c = T_FileStream_getc(file);
if (c != 0xA && c >= 0) {
T_FileStream_ungetc(c, file);
}
}
break;
}
if (!setBuffer(n++, c, ec)) return FALSE;
}
if (!setBuffer(n++, 0, ec)) return FALSE;
line = UnicodeString(buffer, encoding);
++lineNo;
return TRUE;
}
UBool TextFile::readLineSkippingComments(UnicodeString& line, UErrorCode& ec,
UBool trim) {
for (;;) {
if (!readLine(line, ec)) return FALSE;
// Skip over white space
int32_t pos = 0;
ICU_Utility::skipWhitespace(line, pos, TRUE);
// Ignore blank lines and comment lines
if (pos == line.length() || line.charAt(pos) == 0x23/*'#'*/) {
continue;
}
// Process line
if (trim) line.remove(0, pos);
return TRUE;
}
}
/**
* Set buffer[index] to c, growing buffer if necessary. Return TRUE if
* successful.
*/
UBool TextFile::setBuffer(int32_t index, char c, UErrorCode& ec) {
if (capacity <= index) {
if (!ensureCapacity(index+1)) {
ec = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
}
buffer[index] = c;
return TRUE;
}
/**
* Make sure that 'buffer' has at least 'mincapacity' bytes.
* Return TRUE upon success. Upon return, 'buffer' may change
* value. In any case, previous contents are preserved.
*/
UBool TextFile::ensureCapacity(int32_t mincapacity) {
if (capacity >= mincapacity) {
return TRUE;
}
// Grow by factor of 2 to prevent frequent allocation
// Note: 'capacity' may be 0
int32_t i;
for (i = capacity || 1024; i < mincapacity; ) {
i <<= 1;
if (i < 0) {
i = 0x7FFFFFFF;
break;
}
}
mincapacity = i;
// Simple realloc() no good; contents not preserved
// Note: 'buffer' may be 0
char* newbuffer = (char*) uprv_malloc(mincapacity);
if (newbuffer == 0) {
return FALSE;
}
if (buffer != 0) {
uprv_strncpy(newbuffer, buffer, capacity);
uprv_free(buffer);
}
buffer = newbuffer;
capacity = mincapacity;
return TRUE;
}

View file

@ -0,0 +1,73 @@
/*
**********************************************************************
* Copyright (c) 2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: March 19 2004
* Since: ICU 3.0
**********************************************************************
*/
#ifndef __ICU_INTLTEST_TEXTFILE__
#define __ICU_INTLTEST_TEXTFILE__
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "filestrm.h"
/**
* This class implements access to a text data file located in the
* icu/source/test/testdata/ directory.
*/
class TextFile {
public:
/**
* Open a file with the given name, in the given encoding, in the
* ICU testdata directory. See textfile.cpp to determine if the
* 'name' and 'encoding' parameters are aliased or copied.
*/
TextFile(const char* name, const char* encoding, UErrorCode& ec);
virtual ~TextFile();
/**
* Read a line terminated by ^J or ^M or ^M^J, and convert it from
* this file's encoding to Unicode. The EOL character(s) are not
* included in 'line'.
* @return TRUE if a line was read, or FALSE if the EOF
* was reached or an error occurred
*/
UBool readLine(UnicodeString& line, UErrorCode& ec);
/**
* Read a line, ignoring blank lines and lines that start with
* '#'. Trim leading white space.
* @param trim if TRUE then remove leading rule white space
* @return TRUE if a line was read, or FALSE if the EOF
* was reached or an error occurred
*/
UBool readLineSkippingComments(UnicodeString& line, UErrorCode& ec,
UBool trim = FALSE);
/**
* Return the line number of the last line returned by readLine().
*/
inline int32_t getLineNumber() const;
private:
UBool ensureCapacity(int32_t capacity);
UBool setBuffer(int32_t index, char c, UErrorCode& ec);
char* name;
char* encoding;
int32_t lineNo;
char* buffer;
int32_t capacity;
FileStream* file;
};
inline int32_t TextFile::getLineNumber() const {
return lineNo;
}
#endif

View file

@ -0,0 +1,107 @@
/*
**********************************************************************
* Copyright (c) 2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: March 22 2004
* Since: ICU 3.0
**********************************************************************
*/
#include "tokiter.h"
#include "textfile.h"
#include "util.h"
#include "uprops.h"
TokenIterator::TokenIterator(TextFile* r) {
reader = r;
done = haveLine = FALSE;
pos = lastpos = -1;
}
TokenIterator::~TokenIterator() {
}
UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
if (done || U_FAILURE(ec)) {
return FALSE;
}
token.truncate(0);
for (;;) {
if (!haveLine) {
if (!reader->readLineSkippingComments(line, ec)) {
done = TRUE;
return FALSE;
}
haveLine = TRUE;
pos = 0;
}
lastpos = pos;
if (!nextToken(token, ec)) {
haveLine = FALSE;
if (U_FAILURE(ec)) return FALSE;
continue;
}
return TRUE;
}
}
int32_t TokenIterator::getLineNumber() const {
return reader->getLineNumber();
}
/**
* Read the next token from 'this->line' and append it to 'token'.
* Tokens are separated by rule white space. Tokens may also be
* delimited by double or single quotes. The closing quote must match
* the opening quote. If a '#' is encountered, the rest of the line
* is ignored, unless it is backslash-escaped or within quotes.
* @param token the token is appended to this StringBuffer
* @param ec input-output error code
* @return TRUE if a valid token is found, or FALSE if the end
* of the line is reached or an error occurs
*/
UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
ICU_Utility::skipWhitespace(line, pos, TRUE);
if (pos == line.length()) {
return FALSE;
}
UChar c = line.charAt(pos++);
UChar quote = 0;
switch (c) {
case 34/*'"'*/:
case 39/*'\\'*/:
quote = c;
break;
case 35/*'#'*/:
return FALSE;
default:
token.append(c);
break;
}
while (pos < line.length()) {
c = line.charAt(pos); // 16-bit ok
if (c == 92/*'\\'*/) {
UChar32 c32 = line.unescapeAt(pos);
if (c32 < 0) {
ec = U_MALFORMED_UNICODE_ESCAPE;
return FALSE;
}
token.append(c32);
} else if ((quote != 0 && c == quote) ||
(quote == 0 && uprv_isRuleWhiteSpace(c))) {
++pos;
return TRUE;
} else if (quote == 0 && c == '#') {
return TRUE; // do NOT increment
} else {
token.append(c);
++pos;
}
}
if (quote != 0) {
ec = U_UNTERMINATED_QUOTE;
return FALSE;
}
return TRUE;
}

View file

@ -0,0 +1,71 @@
/*
**********************************************************************
* Copyright (c) 2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: March 22 2004
* Since: ICU 3.0
**********************************************************************
*/
#ifndef __ICU_INTLTEST_TOKITER__
#define __ICU_INTLTEST_TOKITER__
#include "unicode/utypes.h"
#include "unicode/unistr.h"
class TextFile;
/**
* An iterator class that returns successive string tokens from some
* source. String tokens are, in general, separated by rule white
* space in the source test. Furthermore, they may be delimited by
* either single or double quotes (opening and closing quotes must
* match). Escapes are processed using standard ICU unescaping.
*/
class TokenIterator {
public:
/**
* Construct an iterator over the tokens returned by the given
* TextFile, ignoring blank lines and comment lines (first
* non-blank character is '#'). Note that trailing comments on a
* line, beginning with the first unquoted '#', are recognized.
*/
TokenIterator(TextFile* r);
virtual ~TokenIterator();
/**
* Return the next token from this iterator.
* @return TRUE if a token was read, or FALSE if no more tokens
* are available or an error occurred.
*/
UBool next(UnicodeString& token, UErrorCode& ec);
/**
* Return the one-based line number of the line of the last token
* returned by next(). Should only be called after a call to
* next(); otherwise the return value is undefined.
*/
int32_t getLineNumber() const;
/**
* Return a string description of the position of the last line
* returned by readLine() or readLineSkippingComments().
*/
//public String describePosition() {
// return reader.describePosition() + ':' + (lastpos+1);
//}
private:
UBool nextToken(UnicodeString& token, UErrorCode& ec);
TextFile* reader; // alias
UnicodeString line;
UBool done;
UBool haveLine;
int32_t pos;
int32_t lastpos;
};
#endif