ICU-2334 initial checkin to support test file driven tests

X-SVN-Rev: 14752
2025-04-21 12:40:02 +00:00 · 2004-03-25 02:19:11 +00:00 · 2004-03-25 02:19:11 +00:00 · 7b67501395
commit 7b67501395
parent 25e9c193f1
5 changed files with 440 additions and 0 deletions
--- a/icu4c/source/test/intltest/intltest.vcproj
+++ b/icu4c/source/test/intltest/intltest.vcproj
@ -646,6 +646,18 @@
 			<File
 				RelativePath=".\testutil.h">
 			</File>
+			<File
+				RelativePath=".\textfile.cpp">
+			</File>
+			<File
+				RelativePath=".\textfile.h">
+			</File>
+			<File
+				RelativePath=".\tokiter.cpp">
+			</File>
+			<File
+				RelativePath=".\tokiter.h">
+			</File>
 		</Filter>
 		<Filter
 			Name="normalization"
--- a/icu4c/source/test/intltest/textfile.cpp
+++ b/icu4c/source/test/intltest/textfile.cpp
@ -0,0 +1,177 @@
+/*
+**********************************************************************
+* Copyright (c) 2004, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: March 19 2004
+* Since: ICU 3.0
+**********************************************************************
+*/
+#include "textfile.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "intltest.h"
+#include "util.h"
+
+// If the symbol CCP is defined, then the 'name' and 'encoding'
+// constructor parameters are copied.  Otherwise they are aliased.
+// #define CCP
+
+TextFile::TextFile(const char* _name, const char* _encoding, UErrorCode& ec) :
+    file(0),
+    name(0), encoding(0),
+    buffer(0),
+    capacity(0),
+    lineNo(0)
+{
+    if (U_FAILURE(ec) || _name == 0 || _encoding == 0) {
+        if (U_SUCCESS(ec)) {
+            ec = U_ILLEGAL_ARGUMENT_ERROR; 
+        }
+        return;
+    }
+
+#ifdef CCP
+    name = uprv_malloc(uprv_strlen(_name) + 1);
+    encoding = uprv_malloc(uprv_strlen(_encoding) + 1);
+    if (name == 0 || encoding == 0) {
+        ec = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    uprv_strcpy(name, _name);
+    uprv_strcpy(encoding, _encoding);
+#else
+    name = (char*) _name;
+    encoding = (char*) _encoding;
+#endif
+
+    const char* testDir = IntlTest::loadTestData(ec);
+    if (U_FAILURE(ec)) {
+        return;
+    }
+    if (!ensureCapacity(uprv_strlen(testDir) + uprv_strlen(name) + 8)) {
+        ec = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    uprv_strcpy(buffer, testDir);
+    char* index = uprv_strrchr(buffer, (char)U_FILE_SEP_CHAR);
+    if (index) index[1] = 0;
+    uprv_strcat(buffer, ".." U_FILE_SEP_STRING);
+    uprv_strcat(buffer, name);
+
+    file = T_FileStream_open(buffer, "rb");
+    if (file == 0) {
+        ec = U_ILLEGAL_ARGUMENT_ERROR; 
+        return;        
+    }
+}
+
+TextFile::~TextFile() {
+    if (file != 0) T_FileStream_close(file);
+    if (buffer != 0) uprv_free(buffer);
+#ifdef CCP
+    uprv_free(name);
+    uprv_free(encoding);
+#endif
+}
+
+UBool TextFile::readLine(UnicodeString& line, UErrorCode& ec) {
+    if (T_FileStream_eof(file)) {
+        return FALSE;
+    }
+    // Note: 'buffer' may change after ensureCapacity() is called,
+    // so don't use 
+    //   p=buffer; *p++=c;
+    // but rather
+    //   i=; buffer[i++]=c;
+    int32_t n = 0;
+    for (;;) {
+        int c = T_FileStream_getc(file); // sic: int, not int32_t
+        if (c < 0 || c == 0xD || c == 0xA) {
+            // consume 0xA following 0xD
+            if (c == 0xD) {
+                c = T_FileStream_getc(file);
+                if (c != 0xA && c >= 0) {
+                    T_FileStream_ungetc(c, file);
+                }
+            }
+            break;
+        }
+        if (!setBuffer(n++, c, ec)) return FALSE;
+    }
+    if (!setBuffer(n++, 0, ec)) return FALSE;
+    line = UnicodeString(buffer, encoding);
+    ++lineNo;
+    return TRUE;
+}
+
+UBool TextFile::readLineSkippingComments(UnicodeString& line, UErrorCode& ec,
+                                         UBool trim) {
+    for (;;) {
+        if (!readLine(line, ec)) return FALSE;
+        // Skip over white space
+        int32_t pos = 0;
+        ICU_Utility::skipWhitespace(line, pos, TRUE);
+        // Ignore blank lines and comment lines
+        if (pos == line.length() || line.charAt(pos) == 0x23/*'#'*/) {
+            continue;
+        }
+        // Process line
+        if (trim) line.remove(0, pos);
+        return TRUE;
+    }
+}
+
+/**
+ * Set buffer[index] to c, growing buffer if necessary. Return TRUE if
+ * successful.
+ */
+UBool TextFile::setBuffer(int32_t index, char c, UErrorCode& ec) {
+    if (capacity <= index) {
+        if (!ensureCapacity(index+1)) {
+            ec = U_MEMORY_ALLOCATION_ERROR;
+            return FALSE;
+        }
+    }
+    buffer[index] = c;
+    return TRUE;
+}
+
+/**
+ * Make sure that 'buffer' has at least 'mincapacity' bytes.
+ * Return TRUE upon success. Upon return, 'buffer' may change
+ * value. In any case, previous contents are preserved.
+ */
+UBool TextFile::ensureCapacity(int32_t mincapacity) {
+    if (capacity >= mincapacity) {
+        return TRUE;
+    }
+
+    // Grow by factor of 2 to prevent frequent allocation
+    // Note: 'capacity' may be 0
+    int32_t i;
+    for (i = capacity || 1024; i < mincapacity; ) {
+        i <<= 1;
+        if (i < 0) {
+            i = 0x7FFFFFFF;
+            break;
+        }
+    }
+    mincapacity = i;
+
+    // Simple realloc() no good; contents not preserved
+    // Note: 'buffer' may be 0
+    char* newbuffer = (char*) uprv_malloc(mincapacity);
+    if (newbuffer == 0) {
+        return FALSE;
+    }
+    if (buffer != 0) {
+        uprv_strncpy(newbuffer, buffer, capacity);
+        uprv_free(buffer);
+    }
+    buffer = newbuffer;
+    capacity = mincapacity;
+    return TRUE;
+}
+
--- a/icu4c/source/test/intltest/textfile.h
+++ b/icu4c/source/test/intltest/textfile.h
@ -0,0 +1,73 @@
+/*
+**********************************************************************
+* Copyright (c) 2004, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: March 19 2004
+* Since: ICU 3.0
+**********************************************************************
+*/
+#ifndef __ICU_INTLTEST_TEXTFILE__
+#define __ICU_INTLTEST_TEXTFILE__
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "filestrm.h"
+
+/**
+ * This class implements access to a text data file located in the
+ * icu/source/test/testdata/ directory.
+ */
+class TextFile {
+ public:
+    /**
+     * Open a file with the given name, in the given encoding, in the
+     * ICU testdata directory. See textfile.cpp to determine if the
+     * 'name' and 'encoding' parameters are aliased or copied.
+     */
+    TextFile(const char* name, const char* encoding, UErrorCode& ec);
+
+    virtual ~TextFile();
+
+    /**
+     * Read a line terminated by ^J or ^M or ^M^J, and convert it from
+     * this file's encoding to Unicode. The EOL character(s) are not
+     * included in 'line'.
+     * @return TRUE if a line was read, or FALSE if the EOF
+     * was reached or an error occurred
+     */
+    UBool readLine(UnicodeString& line, UErrorCode& ec);
+
+    /**
+     * Read a line, ignoring blank lines and lines that start with
+     * '#'.  Trim leading white space.
+     * @param trim if TRUE then remove leading rule white space
+     * @return TRUE if a line was read, or FALSE if the EOF
+     * was reached or an error occurred
+     */
+    UBool readLineSkippingComments(UnicodeString& line, UErrorCode& ec,
+                                   UBool trim = FALSE);
+
+    /**
+     * Return the line number of the last line returned by readLine().
+     */
+    inline int32_t getLineNumber() const;
+
+ private:
+    UBool ensureCapacity(int32_t capacity);
+    UBool setBuffer(int32_t index, char c, UErrorCode& ec);
+
+    char* name;
+    char* encoding;
+    int32_t lineNo;
+    char* buffer;
+    int32_t capacity;
+    FileStream* file;
+};
+
+inline int32_t TextFile::getLineNumber() const {
+    return lineNo;
+}
+
+#endif
--- a/icu4c/source/test/intltest/tokiter.cpp
+++ b/icu4c/source/test/intltest/tokiter.cpp
@ -0,0 +1,107 @@
+/*
+**********************************************************************
+* Copyright (c) 2004, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: March 22 2004
+* Since: ICU 3.0
+**********************************************************************
+*/
+#include "tokiter.h"
+#include "textfile.h"
+#include "util.h"
+#include "uprops.h"
+
+TokenIterator::TokenIterator(TextFile* r) {
+    reader = r;
+    done = haveLine = FALSE;
+    pos = lastpos = -1;
+}
+
+TokenIterator::~TokenIterator() {
+}
+
+UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
+    if (done || U_FAILURE(ec)) {
+        return FALSE;
+    }
+    token.truncate(0);
+    for (;;) {
+        if (!haveLine) {
+            if (!reader->readLineSkippingComments(line, ec)) {
+                done = TRUE;
+                return FALSE;
+            }
+            haveLine = TRUE;
+            pos = 0;
+        }
+        lastpos = pos;
+        if (!nextToken(token, ec)) {
+            haveLine = FALSE;
+            if (U_FAILURE(ec)) return FALSE;
+            continue;
+        }
+        return TRUE;
+    }
+}
+
+int32_t TokenIterator::getLineNumber() const {
+    return reader->getLineNumber();
+}
+
+/**
+ * Read the next token from 'this->line' and append it to 'token'.
+ * Tokens are separated by rule white space.  Tokens may also be
+ * delimited by double or single quotes.  The closing quote must match
+ * the opening quote.  If a '#' is encountered, the rest of the line
+ * is ignored, unless it is backslash-escaped or within quotes.
+ * @param token the token is appended to this StringBuffer
+ * @param ec input-output error code
+ * @return TRUE if a valid token is found, or FALSE if the end
+ * of the line is reached or an error occurs
+ */
+UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
+    ICU_Utility::skipWhitespace(line, pos, TRUE);
+    if (pos == line.length()) {
+        return FALSE;
+    }
+    UChar c = line.charAt(pos++);
+    UChar quote = 0;
+    switch (c) {
+    case 34/*'"'*/:
+    case 39/*'\\'*/:
+        quote = c;
+        break;
+    case 35/*'#'*/:
+        return FALSE;
+    default:
+        token.append(c);
+        break;
+    }
+    while (pos < line.length()) {
+        c = line.charAt(pos); // 16-bit ok
+        if (c == 92/*'\\'*/) {
+            UChar32 c32 = line.unescapeAt(pos);
+            if (c32 < 0) {
+                ec = U_MALFORMED_UNICODE_ESCAPE;
+                return FALSE;
+            }
+            token.append(c32);
+        } else if ((quote != 0 && c == quote) ||
+                   (quote == 0 && uprv_isRuleWhiteSpace(c))) {
+            ++pos;
+            return TRUE;
+        } else if (quote == 0 && c == '#') {
+            return TRUE; // do NOT increment
+        } else {
+            token.append(c);
+            ++pos;
+        }
+    }
+    if (quote != 0) {
+        ec = U_UNTERMINATED_QUOTE;
+        return FALSE;
+    }
+    return TRUE;
+}
--- a/icu4c/source/test/intltest/tokiter.h
+++ b/icu4c/source/test/intltest/tokiter.h
@ -0,0 +1,71 @@
+/*
+**********************************************************************
+* Copyright (c) 2004, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: March 22 2004
+* Since: ICU 3.0
+**********************************************************************
+*/
+#ifndef __ICU_INTLTEST_TOKITER__
+#define __ICU_INTLTEST_TOKITER__
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+class TextFile;
+
+/**
+ * An iterator class that returns successive string tokens from some
+ * source.  String tokens are, in general, separated by rule white
+ * space in the source test.  Furthermore, they may be delimited by
+ * either single or double quotes (opening and closing quotes must
+ * match).  Escapes are processed using standard ICU unescaping.
+ */
+class TokenIterator {
+ public:
+
+    /**
+     * Construct an iterator over the tokens returned by the given
+     * TextFile, ignoring blank lines and comment lines (first
+     * non-blank character is '#').  Note that trailing comments on a
+     * line, beginning with the first unquoted '#', are recognized.
+     */
+    TokenIterator(TextFile* r);
+
+    virtual ~TokenIterator();
+
+    /**
+     * Return the next token from this iterator.
+     * @return TRUE if a token was read, or FALSE if no more tokens
+     * are available or an error occurred.
+     */
+    UBool next(UnicodeString& token, UErrorCode& ec);
+
+    /**
+     * Return the one-based line number of the line of the last token
+     * returned by next(). Should only be called after a call to
+     * next(); otherwise the return value is undefined.
+     */
+    int32_t getLineNumber() const;
+    
+    /**
+     * Return a string description of the position of the last line
+     * returned by readLine() or readLineSkippingComments().
+     */
+    //public String describePosition() {
+    //    return reader.describePosition() + ':' + (lastpos+1);
+    //}
+    
+ private:
+    UBool nextToken(UnicodeString& token, UErrorCode& ec);
+
+    TextFile* reader; // alias
+    UnicodeString line;
+    UBool done;
+    UBool haveLine;
+    int32_t pos;
+    int32_t lastpos;
+};
+
+#endif