ICU-8108 Allowing imports from the same file by suppressing building binaries for imported rules

X-SVN-Rev: 31036
2025-04-17 02:37:25 +00:00 · 2011-12-06 00:23:46 +00:00 · 2011-12-06 00:23:46 +00:00 · 4a6f817e00
commit 4a6f817e00
parent 670f0268a1
5 changed files with 180 additions and 42 deletions
--- a/icu4c/source/test/cintltst/cmsccoll.c
+++ b/icu4c/source/test/cintltst/cmsccoll.c
@ -6593,6 +6593,132 @@ static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
  return (*a < *b ? -1 : 1);
 }

+static void TestImportRulesDeWithPhonebook(void)
+{
+  const char* normalRules[] = {
+    "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
+    "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
+    "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
+  };
+  const OneTestCase normalTests[] = {
+    { {0x00e6}, {0x00c6}, UCOL_LESS},
+    { {0x00fc}, {0x00dc}, UCOL_GREATER},
+  };
+
+  const char* importRules[] = {
+    "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
+    "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
+    "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
+  };
+  const OneTestCase importTests[] = {
+    { {0x00e6}, {0x00c6}, UCOL_LESS},
+    { {0x00fc}, {0x00dc}, UCOL_LESS},
+  };
+
+  doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules));
+  doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules));
+}
+
+static void TestImportRulesFiWithEor(void)
+{
+  /* DUCET. */
+  const char* defaultRules[] = {
+    "&a<b",                                    /* Dummy rule. */
+  };
+
+  const OneTestCase defaultTests[] = {
+    { {0x0110}, {0x00F0}, UCOL_LESS},          /* "Đ" < "ð" */
+    { {0x00a3}, {0x00a5}, UCOL_LESS},          /* "£" < "¥" */
+    { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},  /* "a" < "a£" */
+  };
+
+  /* European Ordering rules: ignore currency characters. */
+  const char* eorRules[] = {
+    "[import root-u-co-eor]",
+  };
+
+  const OneTestCase eorTests[] = {
+    { {0x0110}, {0x00F0}, UCOL_LESS},           /* "Đ" < "ð" */
+    { {0x00a3}, {0x00a5}, UCOL_EQUAL},          /* "£" = "¥" */
+    { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},  /* "a" = "a£" */
+  };
+
+  /* Fi standard rules:  "Đ" >  "ð". */
+  const char* fiStdRules[] = {
+    "[import fi-u-co-standard]",
+  };
+
+  const OneTestCase fiStdTests[] = {
+    { {0x0110}, {0x00F0}, UCOL_GREATER},         /* "Đ" > "ð" */
+    { {0x00a3}, {0x00a5}, UCOL_LESS},            /* "£" < "¥" */
+    { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},    /* "a" < "a£" */
+  };
+
+  /* Both European Ordering Rules and Fi Standard Rules. */
+  const char* eorFiStdRules[] = {
+    "[import root-u-co-eor][import fi-u-co-standard]",
+  };
+
+  /* This is essentially same as the one before once fi.txt is updated with import. */
+  const char* fiEorRules[] = {
+    "[import fi-u-co-eor]",
+  };
+
+  const OneTestCase fiEorTests[] = {
+    { {0x0110}, {0x00F0}, UCOL_GREATER},         /* "Đ" > "ð" */
+    { {0x00a3}, {0x00a5}, UCOL_EQUAL},           /* "£" = "¥" */
+    { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},   /* "a" = "a£" */
+  };
+
+  doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
+  doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));
+  doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));
+  doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules));
+
+  /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
+        eor{
+            Sequence{
+                "[import root-u-co-eor][import fi-u-co-standard]"
+            }
+            Version{"21.0"}
+        }
+  */
+  /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
+
+}
+
+#if 0
+/*
+ * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
+ * the resource files are built with -includeUnihanColl option.
+ * TODO: Uncomment this function and make it work when unihan rules are built by default.
+ */
+static void TestImportRulesCJKWithUnihan(void)
+{
+  /* DUCET. */
+  const char* defaultRules[] = {
+    "&a<b",                                    /* Dummy rule. */
+  };
+
+  const OneTestCase defaultTests[] = {
+    { {0x3402}, {0x4e1e}, UCOL_GREATER},          /* "Đ" < "ð" */
+  };
+
+  /* European Ordering rules: ignore currency characters. */
+  const char* unihanRules[] = {
+    "[import ko-u-co-unihan]",
+  };
+
+  const OneTestCase unihanTests[] = {
+    { {0x3402}, {0x4e1e}, UCOL_LESS},          /* "Đ" < "ð" */
+  };
+
+  doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
+  doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules));
+
+}
+#endif
+
 static void TestImport(void)
 {
    UCollator* vicoll;
@ -6980,6 +7106,9 @@ void addMiscCollTest(TestNode** root)
    TEST(TestPrivateUseCharactersInList);
    TEST(TestPrivateUseCharactersInRange);
    TEST(TestInvalidListsAndRanges);
+    TEST(TestImportRulesDeWithPhonebook);
+    TEST(TestImportRulesFiWithEor);
+    /* TEST(TestImportRulesCJKWithUnihan); */
    TEST(TestImport);
    TEST(TestImportWithType);

--- a/icu4c/source/tools/genrb/genrb.c
+++ b/icu4c/source/tools/genrb/genrb.c
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1998-2010, International Business Machines
+*   Copyright (C) 1998-2011, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -22,7 +22,8 @@
 #include "ucmndata.h"  /* TODO: for reading the pool bundle */

 /* Protos */
-void  processFile(const char *filename, const char* cp, const char *inputDir, const char *outputDir, const char *packageName, UErrorCode *status);
+void  processFile(const char *filename, const char* cp, const char *inputDir, const char *outputDir,
+    const char *packageName, UBool omitBinaryCollation, UErrorCode *status);
 static char *make_res_filename(const char *filename, const char *outputDir,
                               const char *packageName, UErrorCode *status);

@ -285,7 +286,7 @@ main(int argc,
        }
    }

-    initParser(options[NO_BINARY_COLLATION].doesOccur, options[NO_COLLATION_RULES].doesOccur);
+    initParser(options[NO_COLLATION_RULES].doesOccur);

    /*added by Jing*/
    if(options[LANGUAGE].doesOccur) {
@ -424,7 +425,9 @@ main(int argc,
        if (isVerbose()) {
            printf("Processing file \"%s\"\n", theCurrentFileName);
        }
-        processFile(arg, encoding, inputDir, outputDir, gPackageName, &status);
+        processFile(arg, encoding, inputDir, outputDir, gPackageName,
+                    options[NO_BINARY_COLLATION].doesOccur,
+                    &status);
    }

    uprv_free(poolBundle.fBytes);
@ -448,7 +451,9 @@ main(int argc,

 /* Process a file */
 void
-processFile(const char *filename, const char *cp, const char *inputDir, const char *outputDir, const char *packageName, UErrorCode *status) {
+processFile(
+    const char *filename, const char *cp, const char *inputDir, const char *outputDir, const char *packageName,
+    UBool omitBinaryCollation, UErrorCode *status) {
    /*FileStream     *in           = NULL;*/
    struct SRBRoot *data         = NULL;
    UCHARBUF       *ucbuf        = NULL;
@ -471,6 +476,7 @@ processFile(const char *filename, const char *cp, const char *inputDir, const ch
    }else{
        filelen = (int32_t)uprv_strlen(filename);
    }
+
    if(inputDir == NULL) {
        const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
        openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
@ -555,7 +561,7 @@ processFile(const char *filename, const char *cp, const char *inputDir, const ch
        printf("autodetected encoding %s\n", cp);
    }
    /* Parse the data into an SRBRoot */
-    data = parse(ucbuf, inputDir, outputDir, status);
+    data = parse(ucbuf, inputDir, outputDir, !omitBinaryCollation, status);

    if (data == NULL || U_FAILURE(*status)) {
        fprintf(stderr, "couldn't parse the file %s. Error:%s\n", filename,u_errorName(*status));
@ -637,6 +643,7 @@ make_res_filename(const char *filename,

    int32_t pkgLen = 0; /* length of package prefix */

+
    if (U_FAILURE(*status)) {
        return 0;
    }
--- a/icu4c/source/tools/genrb/genrb.h
+++ b/icu4c/source/tools/genrb/genrb.h
@ -49,6 +49,7 @@ U_CAPI void processFile(
    const char *inputDir,
    const char *outputDir,
    const char *packageName,
+    UBool omitBinaryCollation,
    UErrorCode *status);

 U_CDECL_END
--- a/icu4c/source/tools/genrb/parse.cpp
+++ b/icu4c/source/tools/genrb/parse.cpp
@ -84,9 +84,9 @@ typedef struct {
    uint32_t        inputdirLength;
    const char     *outputdir;
    uint32_t        outputdirLength;
+    UBool           makeBinaryCollation;
 } ParseState;

-static UBool gMakeBinaryCollation = TRUE;
 static UBool gOmitCollationRules  = FALSE;

 typedef struct SResource *
@ -772,18 +772,23 @@ static const UChar* importFromDataFile(void* context, const char* locale, const
    }

    /* Parse the data into an SRBRoot */
-    data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, status);
+    data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, status);

    root = data->fRoot;
    collations = resLookup(root, "collations");
-    collation = resLookup(collations, type);
-    sequence = resLookup(collation, "Sequence");
-    urules = sequence->u.fString.fChars;
-    urulesLength = sequence->u.fString.fLength;
-    *pLength = urulesLength;
+    if (collations != NULL) {
+      collation = resLookup(collations, type);
+      if (collation != NULL) {
+        sequence = resLookup(collation, "Sequence");
+        if (sequence != NULL) {
+          urules = sequence->u.fString.fChars;
+          urulesLength = sequence->u.fString.fLength;
+          *pLength = urulesLength;
+        }
+      }
+    }

 finish:
-
    if (inputDirBuf != NULL) {
        uprv_free(inputDirBuf);
    }
@ -893,8 +898,7 @@ addCollation(ParseState* state, struct SResource  *result, uint32_t startline, U
 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
            warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
 #else
-            if(gMakeBinaryCollation) {
-                UErrorCode intStatus = U_ZERO_ERROR;
+            if(state->makeBinaryCollation) {

                /* do the collation elements */
                int32_t     len   = 0;
@ -908,6 +912,7 @@ addCollation(ParseState* state, struct SResource  *result, uint32_t startline, U
                genrbdata.inputDir = state->inputdir;
                genrbdata.outputDir = state->outputdir;

+                UErrorCode intStatus = U_ZERO_ERROR;
                coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength,
                                               UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus);

@ -958,9 +963,9 @@ addCollation(ParseState* state, struct SResource  *result, uint32_t startline, U
                else
                {
                    if(intStatus == U_FILE_ACCESS_ERROR) {
-                      error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
-                      *status = intStatus;
-                      return NULL;
+                        error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
+                        *status = intStatus;
+                        return NULL;
                    }
                    warning(line, "%%Collation could not be constructed from CollationElements - check context!");
                    if(isStrict()){
@ -982,11 +987,6 @@ addCollation(ParseState* state, struct SResource  *result, uint32_t startline, U
                table_add(result, member, line, status);
            }
        }
-
-        /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
-
-        /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
-
        if (U_FAILURE(*status))
        {
            res_close(result);
@ -994,10 +994,9 @@ addCollation(ParseState* state, struct SResource  *result, uint32_t startline, U
        }
    }

-    /* not reached */
-    /* A compiler warning will appear if all paths don't contain a return statement. */
-/*    *status = U_INTERNAL_PROGRAM_ERROR;
-    return NULL;*/
+    // Reached the end without a TOK_CLOSE_BRACE.  Should be an error.
+    *status = U_INTERNAL_PROGRAM_ERROR;
+    return NULL;
 }

 static struct SResource *
@ -1090,7 +1089,6 @@ parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n
                    u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
                    if(uprv_strcmp(typeKeyword, "alias") == 0) {
                        member = parseResource(state, subtag, NULL, status);
-
                        if (U_FAILURE(*status))
                        {
                            res_close(result);
@ -1137,6 +1135,7 @@ realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t s
    UBool             readToken = FALSE;

    /* '{' . (name resource)* '}' */
+
    if(isVerbose()){
        printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
    }
@ -1200,7 +1199,7 @@ realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t s
        }
        readToken = TRUE;
        ustr_deinit(&comment);
-    }
+   }

    /* not reached */
    /* A compiler warning will appear if all paths don't contain a return statement. */
@ -1231,7 +1230,6 @@ parseTable(ParseState* state, char *tag, uint32_t startline, const struct UStrin
    {
        return NULL;
    }
-
    return realParseTable(state, result, tag, startline,  status);
 }

@ -1815,7 +1813,7 @@ static struct {
    {"reserved", NULL, NULL}
 };

-void initParser(UBool omitBinaryCollation, UBool omitCollationRules)
+void initParser(UBool omitCollationRules)
 {
    U_STRING_INIT(k_type_string,    "string",    6);
    U_STRING_INIT(k_type_binary,    "binary",    6);
@ -1836,7 +1834,6 @@ void initParser(UBool omitBinaryCollation, UBool omitCollationRules)
    U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)",   23);
    U_STRING_INIT(k_type_plugin_dependency,     "process(dependency)",       19);

-    gMakeBinaryCollation = !omitBinaryCollation;
    gOmitCollationRules = omitCollationRules;
 }

@ -1897,6 +1894,7 @@ parseResource(ParseState* state, char *tag, const struct UString *comment, UErro
    uint32_t                 startline;
    uint32_t                 line;

+
    token = getToken(state, &tokenValue, NULL, &startline, status);

    if(isVerbose()){
@ -1938,6 +1936,7 @@ parseResource(ParseState* state, char *tag, const struct UString *comment, UErro
        return NULL;
    }

+
    if (resType == RT_UNKNOWN)
    {
        /* No explicit type, so try to work it out.  At this point, we've read the first '{'.
@ -1998,6 +1997,7 @@ parseResource(ParseState* state, char *tag, const struct UString *comment, UErro
        return NULL;
    }

+
    /* We should now know what we need to parse next, so call the appropriate parser
    function and return. */
    parseFunction = gResourceTypes[resType].parseFunction;
@ -2014,7 +2014,8 @@ parseResource(ParseState* state, char *tag, const struct UString *comment, UErro

 /* parse the top-level resource */
 struct SRBRoot *
-parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *status)
+parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UBool makeBinaryCollation,
+      UErrorCode *status)
 {
    struct UString    *tokenValue;
    struct UString    comment;
@ -2024,6 +2025,7 @@ parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *st
    ParseState state;
    uint32_t i;

+
    for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
    {
        ustr_init(&state.lookahead[i].value);
@ -2036,6 +2038,7 @@ parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *st
    state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
    state.outputdir       = outputDir;
    state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
+    state.makeBinaryCollation = makeBinaryCollation;

    ustr_init(&comment);
    expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
@ -2063,9 +2066,7 @@ parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *st
        else
        {
            *status=U_PARSE_ERROR;
-            /* printf("asdsdweqdasdad\n"); */
-
-            error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
+             error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
        }
    }
    else
@ -2101,12 +2102,11 @@ parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *st
    }
    /* top-level tables need not handle special table names like "collations" */
    realParseTable(&state, state.bundle->fRoot, NULL, line, status);
-
    if(dependencyArray!=NULL){
        table_add(state.bundle->fRoot, dependencyArray, 0, status);
        dependencyArray = NULL;
    }
-    if (U_FAILURE(*status))
+   if (U_FAILURE(*status))
    {
        bundle_close(state.bundle, status);
        res_close(dependencyArray);
--- a/icu4c/source/tools/genrb/parse.h
+++ b/icu4c/source/tools/genrb/parse.h
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1998-2006, International Business Machines
+*   Copyright (C) 1998-2011, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -24,10 +24,11 @@

 U_CDECL_BEGIN
 /* One time parser initalisation */
-void initParser(UBool omitBinaryCollation, UBool omitCollationRules);
+void initParser(UBool omitCollationRules);

 /* Parse a ResourceBundle text file */
-struct SRBRoot* parse(UCHARBUF *buf, const char* inputDir, const char* outputDir, UErrorCode *status);
+struct SRBRoot* parse(UCHARBUF *buf, const char* inputDir, const char* outputDir,
+                      UBool omitBinaryCollation, UErrorCode *status);

 U_CDECL_END