ICU-7501 Use allowsParsing attribute from CLDR to detect unparseable RBNF rulesets

X-SVN-Rev: 31416
2025-04-07 22:44:49 +00:00 · 2012-02-17 23:01:16 +00:00 · 2012-02-17 23:01:16 +00:00 · bae575e95a
commit bae575e95a
parent dbecc6efd4
8 changed files with 32 additions and 65 deletions
--- a/icu4c/source/data/rbnf/ar.txt
+++ b/icu4c/source/data/rbnf/ar.txt
@ -61,13 +61,13 @@ ar{
            "1000000000000000: \u0628\u0644\u064A\u0627\u0631[ >%spellout-numbering>];",
            "2000000000000000: <%%spellout-numbering-m< \u0628\u0644\u064A\u0627\u0631[ >%spellout-numbering>];",
            "1000000000000000000: =#,##0=;",
-            "%spellout-cardinal-feminine-prefixpart:",
+            "%spellout-cardinal-feminine-prefixpart@noparse:",
            "-x: \u0646\u0627\u0642\u0635 >>;",
            "x.x: <%spellout-numbering< \u0641\u0627\u0635\u0644\u0629 >%%zz-fraction-feminine> ;",
            "0: \u0635\u0641\u0631 ;",
            "1: ;",
            "3: =%%spellout-cardinal-feminine-prefx= ;",
-            "%spellout-cardinal-feminine-postfixpart:",
+            "%spellout-cardinal-feminine-postfixpart@noparse:",
            "-x: >>;",
            "x.x: ;",
            "0: ;",
@ -155,13 +155,13 @@ ar{
            "1000000000000000: \u0628\u0644\u064A\u0627\u0631[ >%%spellout-numbering-m>];",
            "2000000000000000: <%%spellout-numbering-m< \u0628\u0644\u064A\u0627\u0631[ >%%spellout-numbering-m>];",
            "1000000000000000000: =#,##0=;",
-            "%spellout-cardinal-masculine-prefixpart:",
+            "%spellout-cardinal-masculine-prefixpart@noparse:",
            "-x: \u0646\u0627\u0642\u0635 >>;",
            "x.x: <%%spellout-numbering-m< \u0641\u0627\u0635\u0644\u0629 >%%zz-fraction-masculine> ;",
            "0: \u0635\u0641\u0631 ;",
            "1: ;",
            "3: =%%spellout-cardinal-masculine-prefx= ;",
-            "%spellout-cardinal-masculine-postfixpart:",
+            "%spellout-cardinal-masculine-postfixpart@noparse:",
            "-x: >>;",
            "x.x: ;",
            "0: ;",
--- a/icu4c/source/data/rbnf/ga.txt
+++ b/icu4c/source/data/rbnf/ga.txt
@ -64,7 +64,7 @@ ga{
            "10: =%%spellout-numbering-no-a=;",
            "%%lenient-parse:",
            "& ' ' , ',' ;",
-            "%spellout-numbering-year:",
+            "%spellout-numbering-year@noparse:",
            "-x: m\u00EDneas >>;",
            "x.x: =#,##0.#=;",
            "0: =%spellout-numbering=;",
@ -86,7 +86,7 @@ ga{
            "12: >>=%spellout-cardinal-postfixparth=;",
            "13: >>=%spellout-cardinal-postfixpart=;",
            "20: =%spellout-numbering=;",
-            "%spellout-numbering:",
+            "%spellout-numbering@noparse:",
            "-x: m\u00EDneas >>;",
            "x.x: << pointe >>;",
            "0: a n\u00E1id;",
@ -126,7 +126,7 @@ ga{
            "0: =%spellout-cardinal-prefixpart=;",
            "12: d\u00F3=%spellout-cardinal-postfixparth=;",
            "13: =%spellout-cardinal-prefixpart==%spellout-cardinal-postfixpart=;",
-            "%spellout-cardinal-prefixpart:",
+            "%spellout-cardinal-prefixpart@noparse:",
            "-x: m\u00EDneas >>;",
            "x.x: <%%numberp< pointe >>;",
            "0: n\u00E1id;",
@ -156,7 +156,7 @@ ga{
            "1000000000000: <%%trillions<[, >%%numberp>];",
            "1000000000000000: <%%quadrillions<[, >%%numberp>];",
            "1000000000000000000: =#,##0=;",
-            "%spellout-cardinal-postfixpart:",
+            "%spellout-cardinal-postfixpart@noparse:",
            "-x: >>;",
            "x.x: ;",
            "0: ;",
@ -164,7 +164,7 @@ ga{
            "2: ;",
            "11: ' d\u00E9ag;",
            "20: ;",
-            "%spellout-cardinal-postfixparth:",
+            "%spellout-cardinal-postfixparth@noparse:",
            "-x: >>;",
            "x.x: ;",
            "0: ;",
--- a/icu4c/source/data/rbnf/he.txt
+++ b/icu4c/source/data/rbnf/he.txt
@ -60,13 +60,13 @@ he{
            "2000000000000000: \u05E9\u05E0\u05D9 \u05D8\u05E8\u05D9\u05DC\u05D9\u05E8\u05D3[ >%%and-feminine>];",
            "3000000000000000: <%%spellout-numbering-m< \u05D8\u05E8\u05D9\u05DC\u05D9\u05E8\u05D3[ >%%and-feminine>];",
            "1000000000000000000: =#,##0=;",
-            "%spellout-cardinal-feminine-prefixpart:",
+            "%spellout-cardinal-feminine-prefixpart@noparse:",
            "-x: \u05DE\u05D9\u05E0\u05D5\u05E1 >>;",
            "x.x: <%spellout-numbering< \u05E0\u05E7\u05D5\u05D3\u05D4 >%%zz-fraction-feminine> ;",
            "0: \u05D0\u05E4\u05E1 ;",
            "1: ;",
            "2: =%%spellout-cardinal-feminine-prefx= ;",
-            "%spellout-cardinal-feminine-postfixpart:",
+            "%spellout-cardinal-feminine-postfixpart@noparse:",
            "-x: >>;",
            "x.x: ;",
            "0: ;",
@ -211,13 +211,13 @@ he{
            "2000000000000000: \u05E9\u05E0\u05D9 \u05D8\u05E8\u05D9\u05DC\u05D9\u05E8\u05D3[ >%%and-masculine>];",
            "3000000000000000: <%%spellout-numbering-m< \u05D8\u05E8\u05D9\u05DC\u05D9\u05E8\u05D3[ >%%and-masculine>];",
            "1000000000000000000: =#,##0=;",
-            "%spellout-cardinal-masculine-prefixpart:",
+            "%spellout-cardinal-masculine-prefixpart@noparse:",
            "-x: \u05DE\u05D9\u05E0\u05D5\u05E1 >>;",
            "x.x: <%%spellout-numbering-m< \u05E0\u05E7\u05D5\u05D3\u05D4 >%%zz-fraction-masculine> ;",
            "0: \u05D0\u05E4\u05E1 ;",
            "1: ;",
            "2: =%%spellout-cardinal-masculine-prefx= ;",
-            "%spellout-cardinal-masculine-postfixpart:",
+            "%spellout-cardinal-masculine-postfixpart@noparse:",
            "-x: >>;",
            "x.x: ;",
            "0: ;",
--- a/icu4c/source/data/rbnf/mt.txt
+++ b/icu4c/source/data/rbnf/mt.txt
@ -200,7 +200,7 @@ mt{
            "2000000000000000: <%%spellout-cardinal-masculine< kvadriljuni[>%%and-typeA-masculine>];",
            "11000000000000000/1,000: <%%spellout-cardinal-masculine< kvadriljun[>%%and-typeA-masculine>];",
            "1000000000000000000: =#,##0=;",
-            "%spellout-cardinal-masculine-prefixpart:",
+            "%spellout-cardinal-masculine-prefixpart@noparse:",
            "-x: minus >>;",
            "x.x: <%%spellout-cardinal-masculine< punt >%%zz-fraction-masculine> ;",
            "0: \u017Cero ;",
@ -208,7 +208,7 @@ mt{
            "2: =%%spellout-cardinal-masculine-prefx= ;",
            "%%lenient-parse:",
            "&[last primary ignorable ] << ' ' << ',' << '-' << '\u00AD';",
-            "%spellout-cardinal-masculine-postfixpart:",
+            "%spellout-cardinal-masculine-postfixpart@noparse:",
            "-x: >>;",
            "x.x: ;",
            "0: ;",
@ -321,13 +321,13 @@ mt{
            "2000000000000000: <%%spellout-cardinal-masculine< kvadriljuni[>%%and-typeA-feminine>];",
            "11000000000000000/1,000: <%%spellout-cardinal-masculine< kvadriljun[>%%and-typeA-feminine>];",
            "1000000000000000000: =#,##0=;",
-            "%spellout-cardinal-feminine-prefixpart:",
+            "%spellout-cardinal-feminine-prefixpart@noparse:",
            "-x: minus >>;",
            "x.x: <%%spellout-cardinal-feminine< punt >%%zz-fraction-feminine> ;",
            "0: \u017Cero ;",
            "1: ;",
            "2: =%%spellout-cardinal-feminine-prefx= ;",
-            "%spellout-cardinal-feminine-postfixpart:",
+            "%spellout-cardinal-feminine-postfixpart@noparse:",
            "-x: >>;",
            "x.x: ;",
            "0: ;",
--- a/icu4c/source/i18n/nfrs.cpp
+++ b/icu4c/source/i18n/nfrs.cpp
@ -1,6 +1,6 @@
 /*
 ******************************************************************************
-*   Copyright (C) 1997-2011, International Business Machines
+*   Copyright (C) 1997-2012, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 ******************************************************************************
 *   file name:  nfrs.cpp
@ -113,12 +113,18 @@ static const UChar gPercentPercent[] =
    0x25, 0x25, 0
 }; /* "%%" */

+static const UChar gNoparse[] =
+{
+    0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
+}; /* "@noparse" */
+
 NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
  : name()
  , rules(0)
  , negativeNumberRule(NULL)
  , fIsFractionRuleSet(FALSE)
  , fIsPublic(FALSE)
+  , fIsParseable(TRUE)
  , fRecursionCount(0)
 {
    for (int i = 0; i < 3; ++i) {
@ -163,6 +169,11 @@ NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& sta

    fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;

+    if ( name.endsWith(gNoparse,8) ) {
+        fIsParseable = FALSE;
+        name.truncate(name.length()-8); // remove the @noparse from the name
+    }
+
    // all of the other members of NFRuleSet are initialized
    // by parseRules()
 }
--- a/icu4c/source/i18n/nfrs.h
+++ b/icu4c/source/i18n/nfrs.h
@ -1,6 +1,6 @@
 /*
 ******************************************************************************
-*   Copyright (C) 1997-2009, International Business Machines
+*   Copyright (C) 1997-2012, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 ******************************************************************************
 *   file name:  nfrs.h
@ -41,13 +41,7 @@ class NFRuleSet : public UMemory {

  UBool isPublic() const { return fIsPublic; }

-  UBool isParseable() const { 
-      UnicodeString prefixpart = UNICODE_STRING_SIMPLE("-prefixpart");
-      UnicodeString postfix = UNICODE_STRING_SIMPLE("-postfix");
-      UnicodeString postfx = UNICODE_STRING_SIMPLE("-postfx");
-
-      return ( name.indexOf(prefixpart) == -1 && name.indexOf(postfix) == -1 && name.indexOf(postfx) == -1 );
-  }
+  UBool isParseable() const { return fIsParseable; }

  UBool isFractionRuleSet() const { return fIsFractionRuleSet; }

@ -73,6 +67,7 @@ class NFRuleSet : public UMemory {
  NFRule *fractionRules[3];
  UBool fIsFractionRuleSet;
  UBool fIsPublic;
+  UBool fIsParseable;
  int32_t fRecursionCount;

  NFRuleSet(const NFRuleSet &other); // forbid copying of this class
--- a/icu4c/source/i18n/rbnf.cpp
+++ b/icu4c/source/i18n/rbnf.cpp
@ -58,10 +58,6 @@ static const UChar gSemiPercent[] =
 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)

-// Temporary workaround - when noParse is true, do noting in parse.
-// TODO: We need a real fix - see #6895/#6896
-static const char *NO_SPELLOUT_PARSE_LANGUAGES[] = { "ga", NULL };
-
 U_NAMESPACE_BEGIN

 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
@ -660,7 +656,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
  , lenient(FALSE)
  , lenientParseRules(NULL)
  , localizations(NULL)
-  , noParse(FALSE) //TODO: to be removed after #6895
 {
  LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
  init(description, locinfo, perror, status);
@ -677,7 +672,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
  , lenient(FALSE)
  , lenientParseRules(NULL)
  , localizations(NULL)
-  , noParse(FALSE) //TODO: to be removed after #6895
 {
  LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
  init(description, locinfo, perror, status);
@ -694,7 +688,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
  , lenient(FALSE)
  , lenientParseRules(NULL)
  , localizations(NULL)
-  , noParse(FALSE) //TODO: to be removed after #6895
 {
  init(description, info, perror, status);
 }
@ -710,7 +703,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
  , lenient(FALSE)
  , lenientParseRules(NULL)
  , localizations(NULL)
-  , noParse(FALSE) //TODO: to be removed after #6895
 {
    init(description, NULL, perror, status);
 }
@ -727,7 +719,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
  , lenient(FALSE)
  , lenientParseRules(NULL)
  , localizations(NULL)
-  , noParse(FALSE) //TODO: to be removed after #6895
 {
    init(description, NULL, perror, status);
 }
@ -783,19 +774,6 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale&

        init (desc, locinfo, perror, status);

-        //TODO: we need a real fix - see #6895 / #6896
-        noParse = FALSE;
-        if (tag == URBNF_SPELLOUT) {
-            const char *lang = alocale.getLanguage();
-            for (int32_t i = 0; NO_SPELLOUT_PARSE_LANGUAGES[i] != NULL; i++) {
-                if (uprv_strcmp(lang, NO_SPELLOUT_PARSE_LANGUAGES[i]) == 0) {
-                    noParse = TRUE;
-                    break;
-                }
-            }
-        }
-        //TODO: end
-
        ures_close(ruleSets);
        ures_close(rbnfRules);
    }
@ -830,9 +808,6 @@ RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
    UParseError perror;
    init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);

-    //TODO: remove below when we fix the parse bug - See #6895 / #6896
-    noParse = rhs.noParse;
-
    return *this;
 }

@ -859,9 +834,6 @@ RuleBasedNumberFormat::clone(void) const
        result = 0;
    } else {
        result->lenient = lenient;
-
-        //TODO: remove below when we fix the parse bug - See #6895 / #6896
-        result->noParse = noParse;
    }
    return result;
 }
@ -1160,13 +1132,6 @@ RuleBasedNumberFormat::parse(const UnicodeString& text,
                             Formattable& result,
                             ParsePosition& parsePosition) const
 {
-    //TODO: We need a real fix.  See #6895 / #6896
-    if (noParse) {
-        // skip parsing
-        parsePosition.setErrorIndex(0);
-        return;
-    }
-
    if (!ruleSets) {
        parsePosition.setErrorIndex(0);
        return;
--- a/icu4c/source/i18n/unicode/rbnf.h
+++ b/icu4c/source/i18n/unicode/rbnf.h
@ -1018,10 +1018,6 @@ private:
    UBool lenient;
    UnicodeString* lenientParseRules;
    LocalizationInfo* localizations;
-
-    // Temporary workaround - when noParse is true, do noting in parse.
-    // TODO: We need a real fix - see #6895/#6896
-    UBool noParse;
 };

 // ---------------