ICU-22979 Support inverse rule for [] span in RBNF

This commit is contained in:
George Rhoten 2025-01-07 15:35:18 -08:00
parent 13a5e29644
commit 2d6a4185c3
11 changed files with 697 additions and 474 deletions

View file

@ -152,7 +152,7 @@ NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions,
UnicodeString& description = descriptions[index]; // !!! make sure index is valid
if (description.length() == 0) {
if (description.isEmpty()) {
// throw new IllegalArgumentException("Empty rule set description");
status = U_PARSE_ERROR;
return;
@ -177,16 +177,16 @@ NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions,
name.setTo(UNICODE_STRING_SIMPLE("%default"));
}
if (description.length() == 0) {
if (description.isEmpty()) {
// throw new IllegalArgumentException("Empty rule set description");
status = U_PARSE_ERROR;
}
fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
if ( name.endsWith(gNoparse,8) ) {
if (name.endsWith(gNoparse, 8)) {
fIsParseable = false;
name.truncate(name.length()-8); // remove the @noparse from the name
name.truncate(name.length() - 8); // remove the @noparse from the name
}
// all of the other members of NFRuleSet are initialized

View file

@ -64,6 +64,7 @@ NFRule::~NFRule()
static const char16_t gLeftBracket = 0x005b;
static const char16_t gRightBracket = 0x005d;
static const char16_t gVerticalLine = 0x007C;
static const char16_t gColon = 0x003a;
static const char16_t gZero = 0x0030;
static const char16_t gNine = 0x0039;
@ -146,6 +147,7 @@ NFRule::makeRules(UnicodeString& description,
// then it's really shorthand for two rules (with one exception)
LocalPointer<NFRule> rule2;
UnicodeString sbuf;
int32_t orElseOp = description.indexOf(gVerticalLine);
// we'll actually only split the rule into two rules if its
// base value is an even multiple of its divisor (or it's one
@ -193,9 +195,13 @@ NFRule::makeRules(UnicodeString& description,
rule2->radix = rule1->radix;
rule2->exponent = rule1->exponent;
// rule2's rule text omits the stuff in brackets: initialize
// its rule text and substitutions accordingly
// By default, rule2's rule text omits the stuff in brackets,
// unless it contains a | between the brackets.
// Initialize its rule text and substitutions accordingly.
sbuf.append(description, 0, brack1);
if (orElseOp >= 0) {
sbuf.append(description, orElseOp + 1, brack2 - orElseOp - 1);
}
if (brack2 + 1 < description.length()) {
sbuf.append(description, brack2 + 1, description.length() - brack2 - 1);
}
@ -206,7 +212,12 @@ NFRule::makeRules(UnicodeString& description,
// the brackets themselves: initialize _its_ rule text and
// substitutions accordingly
sbuf.setTo(description, 0, brack1);
sbuf.append(description, brack1 + 1, brack2 - brack1 - 1);
if (orElseOp >= 0) {
sbuf.append(description, brack1 + 1, orElseOp - brack1 - 1);
}
else {
sbuf.append(description, brack1 + 1, brack2 - brack1 - 1);
}
if (brack2 + 1 < description.length()) {
sbuf.append(description, brack2 + 1, description.length() - brack2 - 1);
}
@ -404,7 +415,7 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status)
// finally, if the rule body begins with an apostrophe, strip it off
// (this is generally used to put whitespace at the beginning of
// a rule's rule text)
if (description.length() > 0 && description.charAt(0) == gTick) {
if (!description.isEmpty() && description.charAt(0) == gTick) {
description.removeBetween(0, 1);
}

View file

@ -1568,12 +1568,12 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* locali
// divide up the descriptions into individual rule-set descriptions
// and store them in a temporary array. At each step, we also
// new up a rule set, but all this does is initialize its name
// create a rule set, but all this does is initialize its name
// and remove it from its description. We can't actually parse
// the rest of the descriptions and finish initializing everything
// because we have to know the names and locations of all the rule
// sets before we can actually set everything up
if(!numRuleSets) {
if (!numRuleSets) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
@ -1616,9 +1616,9 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* locali
// last public rule set, no matter what the localization data says.
initDefaultRuleSet();
// finally, we can go back through the temporary descriptions
// list and finish setting up the substructure (and we throw
// away the temporary descriptions as we go)
// Now that we know all the rule names, we can go back through
// the temporary descriptions list and finish setting up the substructure
// (and we throw away the temporary descriptions as we go)
{
for (int i = 0; i < numRuleSets; i++) {
fRuleSets[i]->parseRules(ruleSetDescriptions[i], status);
@ -1706,10 +1706,13 @@ RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
UnicodeString result;
int start = 0;
while (start != -1 && start < description.length()) {
// seek to the first non-whitespace character...
UChar ch;
while (start < description.length()) {
// Seek to the first non-whitespace character...
// If the first non-whitespace character is semicolon, skip it and continue
while (start < description.length()
&& PatternProps::isWhiteSpace(description.charAt(start))) {
&& (PatternProps::isWhiteSpace(ch = description.charAt(start)) || ch == gSemiColon))
{
++start;
}
@ -1720,20 +1723,16 @@ RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
// or if we don't find a semicolon, just copy the rest of
// the string into the result
result.append(description, start, description.length() - start);
start = -1;
break;
}
else if (p < description.length()) {
result.append(description, start, p + 1 - start);
start = p + 1;
}
// when we get here, we've seeked off the end of the string, and
// when we get here from the else, we've seeked off the end of the string, and
// we terminate the loop (we continue until *start* is -1 rather
// than until *p* is -1, because otherwise we'd miss the last
// rule in the description)
else {
start = -1;
}
}
description.setTo(result);

View file

@ -88,23 +88,24 @@ enum URBNFRuleSetTag {
};
/**
* The RuleBasedNumberFormat class formats numbers according to a set of rules. This number formatter is
* typically used for spelling out numeric values in words (e.g., 25,3476 as
* &quot;twenty-five thousand three hundred seventy-six&quot; or &quot;vingt-cinq mille trois
* The RuleBasedNumberFormat class formats numbers according to a set of rules.
*
* <p>This number formatter is typically used for spelling out numeric values in words (e.g., 25,3476
* as &quot;twenty-five thousand three hundred seventy-six&quot; or &quot;vingt-cinq mille trois
* cents soixante-seize&quot; or
* &quot;f&uuml;nfundzwanzigtausenddreihundertsechsundsiebzig&quot;), but can also be used for
* other complicated formatting tasks, such as formatting a number of seconds as hours,
* minutes and seconds (e.g., 3,730 as &quot;1:02:10&quot;).
* minutes and seconds (e.g., 3,730 as &quot;1:02:10&quot;).</p>
*
* <p>The resources contain three predefined formatters for each locale: spellout, which
* spells out a value in words (123 is &quot;one hundred twenty-three&quot;); ordinal, which
* appends an ordinal suffix to the end of a numeral (123 is &quot;123rd&quot;); and
* duration, which shows a duration in seconds as hours, minutes, and seconds (123 is
* &quot;2:03&quot;).&nbsp; The client can also define more specialized <tt>RuleBasedNumberFormat</tt>s
* &quot;2:03&quot;).&nbsp; The client can also define more specialized <code>RuleBasedNumberFormat</code>s
* by supplying programmer-defined rule sets.</p>
*
* <p>The behavior of a <tt>RuleBasedNumberFormat</tt> is specified by a textual description
* that is either passed to the constructor as a <tt>String</tt> or loaded from a resource
* <p>The behavior of a <code>RuleBasedNumberFormat</code> is specified by a textual description
* that is either passed to the constructor as a <code>String</code> or loaded from a resource
* bundle. In its simplest form, the description consists of a semicolon-delimited list of <em>rules.</em>
* Each rule has a string of output text and a value or range of values it is applicable to.
* In a typical spellout rule set, the first twenty rules are the words for the numbers from
@ -116,7 +117,8 @@ enum URBNFRuleSetTag {
* <p>For larger numbers, we can use the preceding set of rules to format the ones place, and
* we only have to supply the words for the multiples of 10:</p>
*
* <pre> 20: twenty[-&gt;&gt;];
* <pre>
* 20: twenty[-&gt;&gt;];
* 30: thirty[-&gt;&gt;];
* 40: forty[-&gt;&gt;];
* 50: fifty[-&gt;&gt;];
@ -137,7 +139,8 @@ enum URBNFRuleSetTag {
* <p>For even larger numbers, we can actually look up several parts of the number in the
* list:</p>
*
* <pre>100: &lt;&lt; hundred[ &gt;&gt;];</pre>
* <pre>
* 100: &lt;&lt; hundred[ &gt;&gt;];</pre>
*
* <p>The &quot;&lt;&lt;&quot; represents a new kind of substitution. The &lt;&lt; isolates
* the hundreds digit (and any digits to its left), formats it using this same rule set, and
@ -155,13 +158,15 @@ enum URBNFRuleSetTag {
*
* <p>This rule covers values up to 999, at which point we add another rule:</p>
*
* <pre>1000: &lt;&lt; thousand[ &gt;&gt;];</pre>
* <pre>
* 1000: &lt;&lt; thousand[ &gt;&gt;];</pre>
*
* <p>Again, the meanings of the brackets and substitution tokens shift because the rule's
* base value is a higher power of 10, changing the rule's divisor. This rule can actually be
* used all the way up to 999,999. This allows us to finish out the rules as follows:</p>
*
* <pre> 1,000,000: &lt;&lt; million[ &gt;&gt;];
* <pre>
* 1,000,000: &lt;&lt; million[ &gt;&gt;];
* 1,000,000,000: &lt;&lt; billion[ &gt;&gt;];
* 1,000,000,000,000: &lt;&lt; trillion[ &gt;&gt;];
* 1,000,000,000,000,000: OUT OF RANGE!;</pre>
@ -177,30 +182,30 @@ enum URBNFRuleSetTag {
* <p>To see how these rules actually work in practice, consider the following example:
* Formatting 25,430 with this rule set would work like this:</p>
*
* <table border="0" width="100%">
* <table style="border-collapse: collapse;">
* <tr>
* <td><strong>&lt;&lt; thousand &gt;&gt;</strong></td>
* <td>[the rule whose base value is 1,000 is applicable to 25,340]</td>
* <td style="width: 257; vertical-align: top;"><strong>&lt;&lt; thousand &gt;&gt;</strong></td>
* <td style="width: 340; vertical-align: top;">[the rule whose base value is 1,000 is applicable to 25,340]</td>
* </tr>
* <tr>
* <td><strong>twenty-&gt;&gt;</strong> thousand &gt;&gt;</td>
* <td>[25,340 over 1,000 is 25. The rule for 20 applies.]</td>
* <td style="width: 257; vertical-align: top;"><strong>twenty-&gt;&gt;</strong> thousand &gt;&gt;</td>
* <td style="width: 340; vertical-align: top;">[25,340 over 1,000 is 25. The rule for 20 applies.]</td>
* </tr>
* <tr>
* <td>twenty-<strong>five</strong> thousand &gt;&gt;</td>
* <td>[25 mod 10 is 5. The rule for 5 is &quot;five.&quot;</td>
* <td style="width: 257; vertical-align: top;">twenty-<strong>five</strong> thousand &gt;&gt;</td>
* <td style="width: 340; vertical-align: top;">[25 mod 10 is 5. The rule for 5 is &quot;five.&quot;</td>
* </tr>
* <tr>
* <td>twenty-five thousand <strong>&lt;&lt; hundred &gt;&gt;</strong></td>
* <td>[25,340 mod 1,000 is 340. The rule for 100 applies.]</td>
* <td style="width: 257; vertical-align: top;">twenty-five thousand <strong>&lt;&lt; hundred &gt;&gt;</strong></td>
* <td style="width: 340; vertical-align: top;">[25,340 mod 1,000 is 340. The rule for 100 applies.]</td>
* </tr>
* <tr>
* <td>twenty-five thousand <strong>three</strong> hundred &gt;&gt;</td>
* <td>[340 over 100 is 3. The rule for 3 is &quot;three.&quot;]</td>
* <td style="width: 257; vertical-align: top;">twenty-five thousand <strong>three</strong> hundred &gt;&gt;</td>
* <td style="width: 340; vertical-align: top;">[340 over 100 is 3. The rule for 3 is &quot;three.&quot;]</td>
* </tr>
* <tr>
* <td>twenty-five thousand three hundred <strong>forty</strong></td>
* <td>[340 mod 100 is 40. The rule for 40 applies. Since 40 divides
* <td style="width: 257; vertical-align: top;">twenty-five thousand three hundred <strong>forty</strong></td>
* <td style="width: 340; vertical-align: top;">[340 mod 100 is 40. The rule for 40 applies. Since 40 divides
* evenly by 10, the hyphen and substitution in the brackets are omitted.]</td>
* </tr>
* </table>
@ -237,20 +242,20 @@ enum URBNFRuleSetTag {
*
* <hr>
*
* <p>The description of a <tt>RuleBasedNumberFormat</tt>'s behavior consists of one or more <em>rule
* <p>The description of a <code>RuleBasedNumberFormat</code>'s behavior consists of one or more <em>rule
* sets.</em> Each rule set consists of a name, a colon, and a list of <em>rules.</em> A rule
* set name must begin with a % sign. Rule sets with names that begin with a single % sign
* are <em>public:</em> the caller can specify that they be used to format and parse numbers.
* Rule sets with names that begin with %% are <em>private:</em> they exist only for the use
* of other rule sets. If a formatter only has one rule set, the name may be omitted.</p>
*
* <p>The user can also specify a special &quot;rule set&quot; named <tt>%%lenient-parse</tt>.
* The body of <tt>%%lenient-parse</tt> isn't a set of number-formatting rules, but a <tt>RuleBasedCollator</tt>
* <p>The user can also specify a special &quot;rule set&quot; named <code>%%lenient-parse</code>.
* The body of <code>%%lenient-parse</code> isn't a set of number-formatting rules, but a <code>RuleBasedCollator</code>
* description which is used to define equivalences for lenient parsing. For more information
* on the syntax, see <tt>RuleBasedCollator</tt>. For more information on lenient parsing,
* see <tt>setLenientParse()</tt>. <em>Note:</em> symbols that have syntactic meaning
* on the syntax, see <code>RuleBasedCollator</code>. For more information on lenient parsing,
* see <code>setLenientParse()</code>. <em>Note:</em> symbols that have syntactic meaning
* in collation rules, such as '&amp;', have no particular meaning when appearing outside
* of the <tt>lenient-parse</tt> rule set.</p>
* of the <code>lenient-parse</code> rule set.</p>
*
* <p>The body of a rule set consists of an ordered, semicolon-delimited list of <em>rules.</em>
* Internally, every rule has a base value, a divisor, rule text, and zero, one, or two <em>substitutions.</em>
@ -260,42 +265,46 @@ enum URBNFRuleSetTag {
* <p>A rule descriptor can take one of the following forms (text in <em>italics</em> is the
* name of a token):</p>
*
* <table border="0" width="100%">
* <table style="border-collapse: collapse;">
* <tr>
* <td><em>bv</em>:</td>
* <td><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal
* <th style="padding-left: 1em; padding-right: 1em;">Descriptor</th>
* <th>Description</th>
* </tr>
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;"><em>bv</em>:</td>
* <td style="vertical-align: top;"><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal
* number expressed using ASCII digits. <em>bv</em> may contain spaces, period, and commas,
* which are ignored. The rule's divisor is the highest power of 10 less than or equal to
* the base value.</td>
* </tr>
* <tr>
* <td><em>bv</em>/<em>rad</em>:</td>
* <td><em>bv</em> specifies the rule's base value. The rule's divisor is the
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;"><em>bv</em>/<em>rad</em>:</td>
* <td style="vertical-align: top;"><em>bv</em> specifies the rule's base value. The rule's divisor is the
* highest power of <em>rad</em> less than or equal to the base value.</td>
* </tr>
* <tr>
* <td><em>bv</em>&gt;:</td>
* <td><em>bv</em> specifies the rule's base value. To calculate the divisor,
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;"><em>bv</em>&gt;:</td>
* <td style="vertical-align: top;"><em>bv</em> specifies the rule's base value. To calculate the divisor,
* let the radix be 10, and the exponent be the highest exponent of the radix that yields a
* result less than or equal to the base value. Every &gt; character after the base value
* decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
* raised to the power of the exponent; otherwise, the divisor is 1.</td>
* </tr>
* <tr>
* <td><em>bv</em>/<em>rad</em>&gt;:</td>
* <td><em>bv</em> specifies the rule's base value. To calculate the divisor,
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;"><em>bv</em>/<em>rad</em>&gt;:</td>
* <td style="vertical-align: top;"><em>bv</em> specifies the rule's base value. To calculate the divisor,
* let the radix be <em>rad</em>, and the exponent be the highest exponent of the radix that
* yields a result less than or equal to the base value. Every &gt; character after the radix
* decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
* raised to the power of the exponent; otherwise, the divisor is 1.</td>
* </tr>
* <tr>
* <td>-x:</td>
* <td>The rule is a negative-number rule.</td>
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;">-x:</td>
* <td style="vertical-align: top;">The rule is a negative-number rule.</td>
* </tr>
* <tr>
* <td>x.x:</td>
* <td>The rule is an <em>improper fraction rule</em>. If the full stop in
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;">x.x:</td>
* <td style="vertical-align: top;">The rule is an <em>improper fraction rule</em>. If the full stop in
* the middle of the rule name is replaced with the decimal point
* that is used in the language or DecimalFormatSymbols, then that rule will
* have precedence when formatting and parsing this rule. For example, some
@ -304,39 +313,39 @@ enum URBNFRuleSetTag {
* handle the decimal point that matches the language's natural spelling of
* the punctuation of either the full stop or comma.</td>
* </tr>
* <tr>
* <td>0.x:</td>
* <td>The rule is a <em>proper fraction rule</em>. If the full stop in
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;">0.x:</td>
* <td style="vertical-align: top;">The rule is a <em>proper fraction rule</em>. If the full stop in
* the middle of the rule name is replaced with the decimal point
* that is used in the language or DecimalFormatSymbols, then that rule will
* have precedence when formatting and parsing this rule. For example, some
* languages use the comma, and can thus be written as 0,x instead. For example,
* you can use "0.x: point &gt;&gt;;0,x: comma &gt;&gt;;" to
* handle the decimal point that matches the language's natural spelling of
* the punctuation of either the full stop or comma.</td>
* the punctuation of either the full stop or comma</td>
* </tr>
* <tr>
* <td>x.0:</td>
* <td>The rule is a <em>default rule</em>. If the full stop in
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;">x.0:</td>
* <td style="vertical-align: top;">The rule is a <em>default rule</em>. If the full stop in
* the middle of the rule name is replaced with the decimal point
* that is used in the language or DecimalFormatSymbols, then that rule will
* have precedence when formatting and parsing this rule. For example, some
* languages use the comma, and can thus be written as x,0 instead. For example,
* you can use "x.0: &lt;&lt; point;x,0: &lt;&lt; comma;" to
* handle the decimal point that matches the language's natural spelling of
* the punctuation of either the full stop or comma.</td>
* the punctuation of either the full stop or comma</td>
* </tr>
* <tr>
* <td>Inf:</td>
* <td>The rule for infinity.</td>
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;">Inf:</td>
* <td style="vertical-align: top;">The rule for infinity.</td>
* </tr>
* <tr>
* <td>NaN:</td>
* <td>The rule for an IEEE 754 NaN (not a number).</td>
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;">NaN:</td>
* <td style="vertical-align: top;">The rule for an IEEE 754 NaN (not a number).</td>
* </tr>
* <tr>
* <td><em>nothing</em></td>
* <td>If the rule's rule descriptor is left out, the base value is one plus the
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;"><em>nothing</em></td>
* <td style="vertical-align: top;">If the rule's rule descriptor is left out, the base value is one plus the
* preceding rule's base value (or zero if this is the first rule in the list) in a normal
* rule set.&nbsp; In a fraction rule set, the base value is the same as the preceding rule's
* base value.</td>
@ -352,8 +361,8 @@ enum URBNFRuleSetTag {
* algorithms: If the rule set is a regular rule set, do the following:
*
* <ul>
* <li>If the rule set includes a default rule (and the number was passed in as a <tt>double</tt>),
* use the default rule.&nbsp; (If the number being formatted was passed in as a <tt>long</tt>,
* <li>If the rule set includes a default rule (and the number was passed in as a <code>double</code>),
* use the default rule.&nbsp; (If the number being formatted was passed in as a <code>long</code>,
* the default rule is ignored.)</li>
* <li>If the number is negative, use the negative-number rule.</li>
* <li>If the number has a fractional part and is greater than 1, use the improper fraction
@ -400,42 +409,43 @@ enum URBNFRuleSetTag {
*
* <p>The meanings of the substitution token characters are as follows:</p>
*
* <table border="0" width="100%">
* <table style="border-collapse: collapse;">
* <tr>
* <td>&gt;&gt;</td>
* <td>in normal rule</td>
* <th>Syntax</th>
* <th>Usage</th>
* <th>Description</th>
* </tr>
* <tr style="border-top: 1px solid black;">
* <td style="white-space: nowrap;" rowspan="4">&gt;&gt;</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in normal rule</td>
* <td>Divide the number by the rule's divisor and format the remainder</td>
* </tr>
* <tr>
* <td></td>
* <td>in negative-number rule</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in negative-number rule</td>
* <td>Find the absolute value of the number and format the result</td>
* </tr>
* <tr>
* <td></td>
* <td>in fraction or default rule</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in fraction or default rule</td>
* <td>Isolate the number's fractional part and format it.</td>
* </tr>
* <tr>
* <td></td>
* <td>in rule in fraction rule set</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in rule in fraction rule set</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td>&gt;&gt;&gt;</td>
* <td>in normal rule</td>
* <tr style="border-top: 1px solid black;">
* <td style="white-space: nowrap;" rowspan="2">&gt;&gt;&gt;</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in normal rule</td>
* <td>Divide the number by the rule's divisor and format the remainder,
* but bypass the normal rule-selection process and just use the
* rule that precedes this one in this rule list.</td>
* </tr>
* <tr>
* <td></td>
* <td>in all other rules</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in all other rules</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td>&lt;&lt;</td>
* <td>in normal rule</td>
* <tr style="border-top: 1px solid black;">
* <td style="white-space: nowrap;" rowspan="4">&lt;&lt;</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in normal rule</td>
* <td>Divide the number by the rule's divisor, perform floor() on the quotient,
* and format the resulting value.<br>
* If there is a DecimalFormat pattern between the &lt; characters and the
@ -448,73 +458,93 @@ enum URBNFRuleSetTag {
* </td>
* </tr>
* <tr>
* <td></td>
* <td>in negative-number rule</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in negative-number rule</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td></td>
* <td>in fraction or default rule</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in fraction or default rule</td>
* <td>Isolate the number's integral part and format it.</td>
* </tr>
* <tr>
* <td></td>
* <td>in rule in fraction rule set</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in rule in fraction rule set</td>
* <td>Multiply the number by the rule's base value and format the result.</td>
* </tr>
* <tr>
* <td>==</td>
* <td>in all rule sets</td>
* <tr style="border-top: 1px solid black;">
* <td style="white-space: nowrap;">==</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in all rule sets</td>
* <td>Format the number unchanged</td>
* </tr>
* <tr>
* <td>[]</td>
* <td>in normal rule</td>
* <td>Omit the optional text if the number is an even multiple of the rule's divisor</td>
* <tr style="border-top: 1px solid black;">
* <td style="white-space: nowrap;" rowspan="6">[]<br/>[|]</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in normal rule</td>
* <td>
* <ul>
* <li>When the number is not an even multiple of the rule's divisor, use the text and rules between the beginning square bracket,
* and the end square bracket or the | symbol.</li>
* <li>When the number is an even multiple of the rule's divisor, and no | symbol is used, omit the text.</li>
* <li>When the number is an even multiple of the rule's divisor, and | symbol is used, use the text and rules between the | symbol,
* and the end square bracket.</li>
* </ul>
* </td>
* </tr>
* <tr>
* <td></td>
* <td>in negative-number rule</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in improper-fraction rule</td>
* <td>This syntax is the same as specifying both an x.x rule and a 0.x rule.
* <ul>
* <li>When the number is not between 0 and 1, use the text and rules between the beginning square bracket,
* and the end square bracket or the | symbol.</li>
* <li>When the number is between 0 and 1, and no | symbol is used, omit the text.</li>
* <li>When the number is between 0 and 1, and | symbol is used, use the text and rules between the | symbol,
* and the end square bracket.</li>
* </ul>
* </td>
* </tr>
* <tr>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in default rule</td>
* <td>This syntax is the same as specifying both an x.x rule and an x.0 rule.
* <ul>
* <li>When the number is not an integer, use the text and rules between the beginning square bracket,
* and the end square bracket or the | symbol.</li>
* <li>When the number is an integer, and no | symbol is used, omit the text.</li>
* <li>When the number is an integer, and | symbol is used, use the text and rules between the | symbol,
* and the end square bracket.</li>
* </ul>
* </td>
* </tr>
* <tr>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in rule in fraction rule set</td>
* <td>
* <ul>
* <li>When multiplying the number by the rule's base value does not yield 1, use the text and rules between the beginning square bracket,
* and the end square bracket or the | symbol.</li>
* <li>When multiplying the number by the rule's base value yields 1, and no | symbol is used, omit the text.</li>
* <li>When multiplying the number by the rule's base value yields 1, and | symbol is used, use the text and rules between the | symbol,
* and the end square bracket.</li>
* </ul>
* </td>
* </tr>
* <tr>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in proper-fraction rule</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td></td>
* <td>in improper-fraction rule</td>
* <td>Omit the optional text if the number is between 0 and 1 (same as specifying both an
* x.x rule and a 0.x rule)</td>
* </tr>
* <tr>
* <td></td>
* <td>in default rule</td>
* <td>Omit the optional text if the number is an integer (same as specifying both an x.x
* rule and an x.0 rule)</td>
* </tr>
* <tr>
* <td></td>
* <td>in proper-fraction rule</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in negative-number rule</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td></td>
* <td>in rule in fraction rule set</td>
* <td>Omit the optional text if multiplying the number by the rule's base value yields 1.</td>
* </tr>
* <tr>
* <td width="37">$(cardinal,<i>plural syntax</i>)$</td>
* <td width="23"></td>
* <td width="165" valign="top">in all rule sets</td>
* <tr style="border-top: 1px solid black;">
* <td style="white-space: nowrap;">$(cardinal,<i>plural syntax</i>)$</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in all rule sets</td>
* <td>This provides the ability to choose a word based on the number divided by the radix to the power of the
* exponent of the base value for the specified locale, which is normally equivalent to the &lt;&lt; value.
* This uses the cardinal plural rules from PluralFormat. All strings used in the plural format are treated
* This uses the cardinal plural rules from {@link PluralFormat}. All strings used in the plural format are treated
* as the same base value for parsing.</td>
* </tr>
* <tr>
* <td width="37">$(ordinal,<i>plural syntax</i>)$</td>
* <td width="23"></td>
* <td width="165" valign="top">in all rule sets</td>
* <tr style="border-top: 1px solid black;">
* <td style="white-space: nowrap;">$(ordinal,<i>plural syntax</i>)$</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in all rule sets</td>
* <td>This provides the ability to choose a word based on the number divided by the radix to the power of the
* exponent of the base value for the specified locale, which is normally equivalent to the &lt;&lt; value.
* This uses the ordinal plural rules from PluralFormat. All strings used in the plural format are treated
* This uses the ordinal plural rules from {@link PluralFormat}. All strings used in the plural format are treated
* as the same base value for parsing.</td>
* </tr>
* </table>
@ -522,22 +552,25 @@ enum URBNFRuleSetTag {
* <p>The substitution descriptor (i.e., the text between the token characters) may take one
* of three forms:</p>
*
* <table border="0" width="100%">
* <table style="border-collapse: collapse;">
* <tr>
* <td>a rule set name</td>
* <th>Descriptor</th>
* <th>Description</th>
* </tr>
* <tr>
* <td style="vertical-align: top;">a rule set name</td>
* <td>Perform the mathematical operation on the number, and format the result using the
* named rule set.</td>
* </tr>
* <tr>
* <td>a DecimalFormat pattern</td>
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;">a DecimalFormat pattern</td>
* <td>Perform the mathematical operation on the number, and format the result using a
* DecimalFormat with the specified pattern.&nbsp; The pattern must begin with 0 or #.</td>
* </tr>
* <tr>
* <td>nothing</td>
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;">nothing</td>
* <td>Perform the mathematical operation on the number, and format the result using the rule
* set containing the current rule, except:
* <ul>
* set containing the current rule, except:<ul>
* <li>You can't have an empty substitution descriptor with a == substitution.</li>
* <li>If you omit the substitution descriptor in a &gt;&gt; substitution in a fraction rule,
* format the result one digit at a time using the rule set containing the current rule.</li>

View file

@ -81,8 +81,9 @@ void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name,
TESTCASE(29, TestNumberingSystem);
TESTCASE(30, TestDFRounding);
TESTCASE(31, TestMemoryLeak22899);
TESTCASE(32, TestInfiniteRecursion);
TESTCASE(33, TestParseRuleDescriptorOverflow23002);
TESTCASE(32, TestParseRuleDescriptorOverflow23002);
TESTCASE(33, TestInfiniteRecursion);
TESTCASE(34, testOmissionReplacementWithPluralRules);
#else
TESTCASE(0, TestRBNFDisabled);
#endif
@ -2661,6 +2662,97 @@ IntlTestRBNF::TestInfiniteRecursion() {
}
}
/**
* This test is a little contrived for English, but the grammar is relevant for several languages, including:
* Latin, Germanic, Slavic and Indic.
* It's pretty common, especially for ordinals, to use different words as a magnitude unit and when it's the final word.
* Several languages need grammatical agreement between the final and non-final magnitude unit
* with the numerical quantity before the unit. This test is the equivalent seen in other languages.
*/
void
IntlTestRBNF::testOmissionReplacementWithPluralRules() {
UnicodeString rules("%cardinal:\n"
"-x: minus >>;\n"
"x.x: << point >>;\n"
"Inf: infinite;\n"
"NaN: not a number;\n"
"zero; one; two; three; four; five; six; seven; eight; nine;\n"
"ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen; seventeen; eighteen; nineteen;\n"
"20: twenty[->>];\n"
"30: thirty[->>];\n"
"40: forty[->>];\n"
"50: fifty[->>];\n"
"60: sixty[->>];\n"
"70: seventy[->>];\n"
"80: eighty[->>];\n"
"90: ninety[->>];\n"
"100: << hundred[ >>];\n"
"1000: << thousand[ >>];\n"
"1000000: << million[ >>];\n"
"1000000000: << billion[ >>];\n"
"1000000000000: << trillion[ >>];\n"
"1000000000000000: =#,##0=;\n"
"%ordinal:\n"
"-x: minus >>;\n"
"x.x: =#,##0.#=;\n"
"Inf: infinitieth;\n"
"zeroth; first; second; third; fourth; fifth; sixth; seventh; eighth; ninth;\n"
"tenth; eleventh; twelfth;\n"
"13: =%cardinal=th;\n"
"20: twent[y->>|ieth];\n"
"30: thirt[y->>|ieth];\n"
"40: fort[y->>|ieth];\n"
"50: fift[y->>|ieth];\n"
"60: sixt[y->>|ieth];\n"
"70: sevent[y->>|ieth];\n"
"80: eight[y->>|ieth];\n"
"90: ninet[y->>|ieth];\n"
"100: <%cardinal< [$(cardinal,one{hundred}other{hundreds})$ >>|$(cardinal,one{hundredth}other{hundredths})$];\n"
"1000: <%cardinal< [$(cardinal,one{thousand}other{thousands})$ >>|$(cardinal,one{thousandth}other{thousandths})$];\n"
"1000000: <%cardinal< [$(cardinal,one{million}other{millions})$ >>|$(cardinal,one{millionth}other{millionths})$];\n"
"1000000000: <%cardinal< [$(cardinal,one{billion}other{billions})$ >>|$(cardinal,one{billionth}other{billionths})$];\n"
"1000000000000: <%cardinal< [$(cardinal,one{trillion}other{trillions})$ >>|$(cardinal,one{trillionth}other{trillionths})$];\n"
"1000000000000000: =#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;");
UErrorCode status = U_ZERO_ERROR;
UParseError perror;
icu::RuleBasedNumberFormat rbnf(rules, icu::Locale::getEnglish(), perror, status);
const char * const enTestFullData[][2] = {
{"20", "twentieth"},
{"21", "twenty-first"},
{"29", "twenty-ninth"},
{"30", "thirtieth"},
{"31", "thirty-first"},
{"39", "thirty-ninth"},
{"100", "one hundredth"},
{"101", "one hundred first"},
{"200", "two hundredths"},
{"201", "two hundreds first"},
{"300", "three hundredths"},
{"301", "three hundreds first"},
{"1000", "one thousandth"},
{"1001", "one thousand first"},
{"1100", "one thousand one hundredth"},
{"1101", "one thousand one hundred first"},
{"1200", "one thousand two hundredths"},
{"1201", "one thousand two hundreds first"},
{"2000", "two thousandths"},
{"2001", "two thousands first"},
{"2100", "two thousands one hundredth"},
{"2101", "two thousands one hundred first"},
{"8000", "eight thousandths"},
{"8001", "eight thousands first"},
{"888000", "eight hundred eighty-eight thousandths"},
{"888001", "eight hundred eighty-eight thousands first"},
{"888100", "eight hundred eighty-eight thousands one hundredth"},
{"999101", "nine hundred ninety-nine thousands one hundred first"},
{"999200", "nine hundred ninety-nine thousands two hundredths"},
{"999201", "nine hundred ninety-nine thousands two hundreds first"},
{ nullptr, nullptr }
};
doTest(&rbnf, enTestFullData, false);
}
/* U_HAVE_RBNF */
#else

View file

@ -19,139 +19,139 @@
class IntlTestRBNF : public IntlTest {
public:
public:
// IntlTest override
virtual void runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) override;
// IntlTest override
virtual void runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) override;
#if U_HAVE_RBNF
/**
* Perform an API test
*/
virtual void TestAPI();
/**
* Perform an API test
*/
virtual void TestAPI();
void TestMultiplePluralRules();
void TestMultiplePluralRules();
/**
* Perform a simple spot check on the FractionalRuleSet logic
*/
virtual void TestFractionalRuleSet();
/**
* Perform a simple spot check on the FractionalRuleSet logic
*/
virtual void TestFractionalRuleSet();
#if 0
/**
* Perform API tests on llong
*/
virtual void TestLLong();
virtual void TestLLongConstructors();
virtual void TestLLongSimpleOperators();
/**
* Perform API tests on llong
*/
virtual void TestLLong();
virtual void TestLLongConstructors();
virtual void TestLLongSimpleOperators();
#endif
/**
* Perform a simple spot check on the English spellout rules
*/
void TestEnglishSpellout();
/**
* Perform a simple spot check on the English spellout rules
*/
void TestEnglishSpellout();
/**
* Perform a simple spot check on the English ordinal-abbreviation rules
*/
void TestOrdinalAbbreviations();
/**
* Perform a simple spot check on the English ordinal-abbreviation rules
*/
void TestOrdinalAbbreviations();
/**
* Perform a simple spot check on the duration-formatting rules
*/
void TestDurations();
/**
* Test that rounding works correctly on multiplier substitutions that use
* a DecimalFormat.
*/
void TestDFRounding();
/**
* Perform a simple spot check on the duration-formatting rules
*/
void TestDurations();
/**
* Perform a simple spot check on the Spanish spellout rules
*/
void TestSpanishSpellout();
/**
* Test that rounding works correctly on multiplier substitutions that use
* a DecimalFormat.
*/
void TestDFRounding();
/**
* Perform a simple spot check on the French spellout rules
*/
void TestFrenchSpellout();
/**
* Perform a simple spot check on the Spanish spellout rules
*/
void TestSpanishSpellout();
/**
* Perform a simple spot check on the Swiss French spellout rules
*/
void TestSwissFrenchSpellout();
/**
* Perform a simple spot check on the French spellout rules
*/
void TestFrenchSpellout();
/**
* Check that Belgian French matches Swiss French spellout rules
*/
void TestBelgianFrenchSpellout();
/**
* Perform a simple spot check on the Swiss French spellout rules
*/
void TestSwissFrenchSpellout();
/**
* Perform a simple spot check on the Italian spellout rules
*/
void TestItalianSpellout();
/**
* Check that Belgian French matches Swiss French spellout rules
*/
void TestBelgianFrenchSpellout();
/**
* Perform a simple spot check on the Portuguese spellout rules
*/
void TestPortugueseSpellout();
/**
* Perform a simple spot check on the Italian spellout rules
*/
void TestItalianSpellout();
/**
* Perform a simple spot check on the German spellout rules
*/
void TestGermanSpellout();
/**
* Perform a simple spot check on the Portuguese spellout rules
*/
void TestPortugueseSpellout();
/**
* Perform a simple spot check on the Thai spellout rules
*/
void TestThaiSpellout();
/**
* Perform a simple spot check on the German spellout rules
*/
void TestGermanSpellout();
/**
* Perform a simple spot check on the Norwegian (no,nb) spellout rules
*/
void TestNorwegianSpellout();
/**
* Perform a simple spot check on the Thai spellout rules
*/
void TestThaiSpellout();
/**
* Perform a simple spot check on the Swedish spellout rules
*/
void TestSwedishSpellout();
/**
* Perform a simple spot check on the Norwegian (no,nb) spellout rules
*/
void TestNorwegianSpellout();
/**
* Perform a simple spot check on small values
*/
void TestSmallValues();
/**
* Perform a simple spot check on the Swedish spellout rules
*/
void TestSwedishSpellout();
/**
* Test localizations using string data.
*/
void TestLocalizations();
/**
* Perform a simple spot check on small values
*/
void TestSmallValues();
/**
* Test that all locales construct ok.
*/
void TestAllLocales();
/**
* Test localizations using string data.
*/
void TestLocalizations();
/**
* Test that hebrew fractions format without trailing '<'
*/
void TestHebrewFraction();
/**
* Test that all locales construct ok.
*/
void TestAllLocales();
/**
* Regression test, don't truncate
* when doing multiplier substitution to a number format rule.
*/
void TestMultiplierSubstitution();
/**
* Test that hebrew fractions format without trailing '<'
*/
void TestHebrewFraction();
/**
* Test the setDecimalFormatSymbols in RBNF
*/
void TestSetDecimalFormatSymbols();
/**
* Regression test, don't truncate
* when doing multiplier substitution to a number format rule.
*/
void TestMultiplierSubstitution();
/**
* Test the plural rules in RBNF
*/
void TestPluralRules();
/**
* Test the setDecimalFormatSymbols in RBNF
*/
void TestSetDecimalFormatSymbols();
/**
* Test the plural rules in RBNF
*/
void TestPluralRules();
void TestInfinityNaN();
void TestVariableDecimalPoint();
@ -162,17 +162,18 @@ class IntlTestRBNF : public IntlTest {
void TestMinMaxIntegerDigitsIgnored();
void TestNumberingSystem();
void TestMemoryLeak22899();
void TestInfiniteRecursion();
void TestParseRuleDescriptorOverflow23002();
void TestInfiniteRecursion();
void testOmissionReplacementWithPluralRules();
protected:
virtual void doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing);
virtual void doLenientParseTest(RuleBasedNumberFormat* formatter, const char* testData[][2]);
virtual void doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing);
virtual void doLenientParseTest(RuleBasedNumberFormat* formatter, const char* testData[][2]);
/* U_HAVE_RBNF */
#else
virtual void TestRBNFDisabled();
virtual void TestRBNFDisabled();
/* U_HAVE_RBNF */
#endif

View file

@ -1951,4 +1951,92 @@ public class RbnfTest extends CoreTestFmwk {
}
}
}
/**
* This test is a little contrived for English, but the grammar is relevant for several languages, including:
* Latin, Germanic, Slavic and Indic.
* It's pretty common, especially for ordinals, to use different words as a magnitude unit and when it's the final word.
* Several languages need grammatical agreement between the final and non-final magnitude unit
* with the numerical quantity before the unit. This test is the equivalent seen in other languages.
*/
@Test
public void testOmissionReplacementWithPluralRules() {
final String rules = "%cardinal:\n" +
"-x: minus >>;\n" +
"x.x: << point >>;\n" +
"Inf: infinite;\n" +
"NaN: not a number;\n" +
"zero; one; two; three; four; five; six; seven; eight; nine;\n" +
"ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen; seventeen; eighteen; nineteen;\n" +
"20: twenty[->>];\n" +
"30: thirty[->>];\n" +
"40: forty[->>];\n" +
"50: fifty[->>];\n" +
"60: sixty[->>];\n" +
"70: seventy[->>];\n" +
"80: eighty[->>];\n" +
"90: ninety[->>];\n" +
"100: << hundred[ >>];\n" +
"1000: << thousand[ >>];\n" +
"1000000: << million[ >>];\n" +
"1000000000: << billion[ >>];\n" +
"1000000000000: << trillion[ >>];\n" +
"1000000000000000: =#,##0=;\n" +
"%ordinal:\n" +
"-x: minus >>;\n" +
"x.x: =#,##0.#=;\n" +
"Inf: infinitieth;\n" +
"zeroth; first; second; third; fourth; fifth; sixth; seventh; eighth; ninth;\n" +
"tenth; eleventh; twelfth;\n" +
"13: =%cardinal=th;\n" +
"20: twent[y->>|ieth];\n" +
"30: thirt[y->>|ieth];\n" +
"40: fort[y->>|ieth];\n" +
"50: fift[y->>|ieth];\n" +
"60: sixt[y->>|ieth];\n" +
"70: sevent[y->>|ieth];\n" +
"80: eight[y->>|ieth];\n" +
"90: ninet[y->>|ieth];\n" +
"100: <%cardinal< [$(cardinal,one{hundred}other{hundreds})$ >>|$(cardinal,one{hundredth}other{hundredths})$];\n" +
"1000: <%cardinal< [$(cardinal,one{thousand}other{thousands})$ >>|$(cardinal,one{thousandth}other{thousandths})$];\n" +
"1000000: <%cardinal< [$(cardinal,one{million}other{millions})$ >>|$(cardinal,one{millionth}other{millionths})$];\n" +
"1000000000: <%cardinal< [$(cardinal,one{billion}other{billions})$ >>|$(cardinal,one{billionth}other{billionths})$];\n" +
"1000000000000: <%cardinal< [$(cardinal,one{trillion}other{trillions})$ >>|$(cardinal,one{trillionth}other{trillionths})$];\n" +
"1000000000000000: =#,##0=$(ordinal,one{st}two{nd}few{rd}other{th})$;";
RuleBasedNumberFormat rbnf = new RuleBasedNumberFormat(rules, ULocale.US);
String[][] enTestFullData = {
{"20", "twentieth"},
{"21", "twenty-first"},
{"29", "twenty-ninth"},
{"30", "thirtieth"},
{"31", "thirty-first"},
{"39", "thirty-ninth"},
{"100", "one hundredth"},
{"101", "one hundred first"},
{"200", "two hundredths"},
{"201", "two hundreds first"},
{"300", "three hundredths"},
{"301", "three hundreds first"},
{"1000", "one thousandth"},
{"1001", "one thousand first"},
{"1100", "one thousand one hundredth"},
{"1101", "one thousand one hundred first"},
{"1200", "one thousand two hundredths"},
{"1201", "one thousand two hundreds first"},
{"2000", "two thousandths"},
{"2001", "two thousands first"},
{"2100", "two thousands one hundredth"},
{"2101", "two thousands one hundred first"},
{"8000", "eight thousandths"},
{"8001", "eight thousands first"},
{"888000", "eight hundred eighty-eight thousandths"},
{"888001", "eight hundred eighty-eight thousands first"},
{"888100", "eight hundred eighty-eight thousands one hundredth"},
{"999101", "nine hundred ninety-nine thousands one hundred first"},
{"999200", "nine hundred ninety-nine thousands two hundredths"},
{"999201", "nine hundred ninety-nine thousands two hundreds first"},
};
doTest(rbnf, enTestFullData, false);
}
}

View file

@ -162,6 +162,7 @@ final class NFRule {
// then it's really shorthand for two rules (with one exception)
NFRule rule2 = null;
StringBuilder sbuf = new StringBuilder();
int orElseOp = description.indexOf('|');
// we'll actually only split the rule into two rules if its
// base value is an even multiple of its divisor (or it's one
@ -203,11 +204,15 @@ final class NFRule {
rule2.radix = rule1.radix;
rule2.exponent = rule1.exponent;
// rule2's rule text omits the stuff in brackets: initialize
// its rule text and substitutions accordingly
sbuf.append(description.substring(0, brack1));
// By default, rule2's rule text omits the stuff in brackets,
// unless it contains a | between the brackets.
// Initialize its rule text and substitutions accordingly
sbuf.append(description, 0, brack1);
if (orElseOp >= 0) {
sbuf.append(description, orElseOp + 1, brack2);
}
if (brack2 + 1 < description.length()) {
sbuf.append(description.substring(brack2 + 1));
sbuf.append(description, brack2 + 1, description.length());
}
rule2.extractSubstitutions(owner, sbuf.toString(), predecessor);
}
@ -216,8 +221,13 @@ final class NFRule {
// the brackets themselves: initialize _its_ rule text and
// substitutions accordingly
sbuf.setLength(0);
sbuf.append(description.substring(0, brack1));
sbuf.append(description.substring(brack1 + 1, brack2));
sbuf.append(description, 0, brack1);
if (orElseOp >= 0) {
sbuf.append(description, brack1 + 1, orElseOp);
}
else {
sbuf.append(description, brack1 + 1, brack2);
}
if (brack2 + 1 < description.length()) {
sbuf.append(description.substring(brack2 + 1));
}
@ -394,7 +404,7 @@ final class NFRule {
// finally, if the rule body begins with an apostrophe, strip it off
// (this is generally used to put whitespace at the beginning of
// a rule's rule text)
if (description.length() > 0 && description.charAt(0) == '\'') {
if (!description.isEmpty() && description.charAt(0) == '\'') {
description = description.substring(1);
}

View file

@ -105,7 +105,7 @@ final class NFRuleSet {
this.owner = owner;
String description = descriptions[index];
if (description.length() == 0) {
if (description.isEmpty()) {
throw new IllegalArgumentException("Empty rule set description");
}
@ -119,12 +119,12 @@ final class NFRuleSet {
throw new IllegalArgumentException("Rule set name doesn't end in colon");
}
else {
String name = description.substring(0, pos);
this.isParseable = !name.endsWith("@noparse");
String ruleName = description.substring(0, pos);
this.isParseable = !ruleName.endsWith("@noparse");
if (!this.isParseable) {
name = name.substring(0,name.length()-8); // Remove the @noparse from the name
ruleName = ruleName.substring(0, ruleName.length() - 8); // Remove the @noparse from the name
}
this.name = name;
this.name = ruleName;
//noinspection StatementWithEmptyBody
while (pos < description.length() && PatternProps.isWhiteSpace(description.charAt(++pos))) {
@ -140,7 +140,7 @@ final class NFRuleSet {
isParseable = true;
}
if (description.length() == 0) {
if (description.isEmpty()) {
throw new IllegalArgumentException("Empty rule set description");
}

View file

@ -32,11 +32,12 @@ import com.ibm.icu.util.UResourceBundleIterator;
/**
* <p>A class that formats numbers according to a set of rules. This number formatter is
* typically used for spelling out numeric values in words (e.g., 25,3476 as
* &quot;twenty-five thousand three hundred seventy-six&quot; or &quot;vingt-cinq mille trois
* The RuleBasedNumberFormat class formats numbers according to a set of rules.
*
* <p>This number formatter is typically used for spelling out numeric values in words (e.g., 25,3476
* as &quot;twenty-five thousand three hundred seventy-six&quot; or &quot;vingt-cinq mille trois
* cents soixante-seize&quot; or
* &quot;funfundzwanzigtausenddreihundertsechsundsiebzig&quot;), but can also be used for
* &quot;f&uuml;nfundzwanzigtausenddreihundertsechsundsiebzig&quot;), but can also be used for
* other complicated formatting tasks, such as formatting a number of seconds as hours,
* minutes and seconds (e.g., 3,730 as &quot;1:02:10&quot;).</p>
*
@ -44,11 +45,11 @@ import com.ibm.icu.util.UResourceBundleIterator;
* spells out a value in words (123 is &quot;one hundred twenty-three&quot;); ordinal, which
* appends an ordinal suffix to the end of a numeral (123 is &quot;123rd&quot;); and
* duration, which shows a duration in seconds as hours, minutes, and seconds (123 is
* &quot;2:03&quot;).&nbsp; The client can also define more specialized <tt>RuleBasedNumberFormat</tt>s
* &quot;2:03&quot;).&nbsp; The client can also define more specialized <code>RuleBasedNumberFormat</code>s
* by supplying programmer-defined rule sets.</p>
*
* <p>The behavior of a <tt>RuleBasedNumberFormat</tt> is specified by a textual description
* that is either passed to the constructor as a <tt>String</tt> or loaded from a resource
* <p>The behavior of a <code>RuleBasedNumberFormat</code> is specified by a textual description
* that is either passed to the constructor as a <code>String</code> or loaded from a resource
* bundle. In its simplest form, the description consists of a semicolon-delimited list of <em>rules.</em>
* Each rule has a string of output text and a value or range of values it is applicable to.
* In a typical spellout rule set, the first twenty rules are the words for the numbers from
@ -60,8 +61,9 @@ import com.ibm.icu.util.UResourceBundleIterator;
* <p>For larger numbers, we can use the preceding set of rules to format the ones place, and
* we only have to supply the words for the multiples of 10:</p>
*
* <pre>20: twenty[-&gt;&gt;];
* 30: thirty{-&gt;&gt;];
* <pre>
* 20: twenty[-&gt;&gt;];
* 30: thirty[-&gt;&gt;];
* 40: forty[-&gt;&gt;];
* 50: fifty[-&gt;&gt;];
* 60: sixty[-&gt;&gt;];
@ -81,7 +83,8 @@ import com.ibm.icu.util.UResourceBundleIterator;
* <p>For even larger numbers, we can actually look up several parts of the number in the
* list:</p>
*
* <pre>100: &lt;&lt; hundred[ &gt;&gt;];</pre>
* <pre>
* 100: &lt;&lt; hundred[ &gt;&gt;];</pre>
*
* <p>The &quot;&lt;&lt;&quot; represents a new kind of substitution. The &lt;&lt; isolates
* the hundreds digit (and any digits to its left), formats it using this same rule set, and
@ -99,13 +102,15 @@ import com.ibm.icu.util.UResourceBundleIterator;
*
* <p>This rule covers values up to 999, at which point we add another rule:</p>
*
* <pre>1000: &lt;&lt; thousand[ &gt;&gt;];</pre>
* <pre>
* 1000: &lt;&lt; thousand[ &gt;&gt;];</pre>
*
* <p>Again, the meanings of the brackets and substitution tokens shift because the rule's
* base value is a higher power of 10, changing the rule's divisor. This rule can actually be
* used all the way up to 999,999. This allows us to finish out the rules as follows:</p>
*
* <pre>1,000,000: &lt;&lt; million[ &gt;&gt;];
* <pre>
* 1,000,000: &lt;&lt; million[ &gt;&gt;];
* 1,000,000,000: &lt;&lt; billion[ &gt;&gt;];
* 1,000,000,000,000: &lt;&lt; trillion[ &gt;&gt;];
* 1,000,000,000,000,000: OUT OF RANGE!;</pre>
@ -121,34 +126,28 @@ import com.ibm.icu.util.UResourceBundleIterator;
* <p>To see how these rules actually work in practice, consider the following example:
* Formatting 25,430 with this rule set would work like this:</p>
*
* <table border="0" width="630">
* <table style="border-collapse: collapse;">
* <tr>
* <td style="width: 21;"></td>
* <td style="width: 257; vertical-align: top;"><strong>&lt;&lt; thousand &gt;&gt;</strong></td>
* <td style="width: 340; vertical-align: top;">[the rule whose base value is 1,000 is applicable to 25,340]</td>
* </tr>
* <tr>
* <td style="width: 21;"></td>
* <td style="width: 257; vertical-align: top;"><strong>twenty-&gt;&gt;</strong> thousand &gt;&gt;</td>
* <td style="width: 340; vertical-align: top;">[25,340 over 1,000 is 25. The rule for 20 applies.]</td>
* </tr>
* <tr>
* <td style="width: 21;"></td>
* <td style="width: 257; vertical-align: top;">twenty-<strong>five</strong> thousand &gt;&gt;</td>
* <td style="width: 340; vertical-align: top;">[25 mod 10 is 5. The rule for 5 is &quot;five.&quot;</td>
* </tr>
* <tr>
* <td style="width: 21;"></td>
* <td style="width: 257; vertical-align: top;">twenty-five thousand <strong>&lt;&lt; hundred &gt;&gt;</strong></td>
* <td style="width: 340; vertical-align: top;">[25,340 mod 1,000 is 340. The rule for 100 applies.]</td>
* </tr>
* <tr>
* <td style="width: 21;"></td>
* <td style="width: 257; vertical-align: top;">twenty-five thousand <strong>three</strong> hundred &gt;&gt;</td>
* <td style="width: 340; vertical-align: top;">[340 over 100 is 3. The rule for 3 is &quot;three.&quot;]</td>
* </tr>
* <tr>
* <td style="width: 21;"></td>
* <td style="width: 257; vertical-align: top;">twenty-five thousand three hundred <strong>forty</strong></td>
* <td style="width: 340; vertical-align: top;">[340 mod 100 is 40. The rule for 40 applies. Since 40 divides
* evenly by 10, the hyphen and substitution in the brackets are omitted.]</td>
@ -187,20 +186,20 @@ import com.ibm.icu.util.UResourceBundleIterator;
*
* <hr>
*
* <p>The description of a <tt>RuleBasedNumberFormat</tt>'s behavior consists of one or more <em>rule
* <p>The description of a <code>RuleBasedNumberFormat</code>'s behavior consists of one or more <em>rule
* sets.</em> Each rule set consists of a name, a colon, and a list of <em>rules.</em> A rule
* set name must begin with a % sign. Rule sets with names that begin with a single % sign
* are <em>public:</em> the caller can specify that they be used to format and parse numbers.
* Rule sets with names that begin with %% are <em>private:</em> they exist only for the use
* of other rule sets. If a formatter only has one rule set, the name may be omitted.</p>
*
* <p>The user can also specify a special &quot;rule set&quot; named <tt>%%lenient-parse</tt>.
* The body of <tt>%%lenient-parse</tt> isn't a set of number-formatting rules, but a <tt>RuleBasedCollator</tt>
* <p>The user can also specify a special &quot;rule set&quot; named <code>%%lenient-parse</code>.
* The body of <code>%%lenient-parse</code> isn't a set of number-formatting rules, but a <code>RuleBasedCollator</code>
* description which is used to define equivalences for lenient parsing. For more information
* on the syntax, see <tt>RuleBasedCollator</tt>. For more information on lenient parsing,
* see <tt>setLenientParse()</tt>. <em>Note:</em> symbols that have syntactic meaning
* on the syntax, see <code>RuleBasedCollator</code>. For more information on lenient parsing,
* see <code>setLenientParse()</code>. <em>Note:</em> symbols that have syntactic meaning
* in collation rules, such as '&amp;', have no particular meaning when appearing outside
* of the <tt>lenient-parse</tt> rule set.</p>
* of the <code>lenient-parse</code> rule set.</p>
*
* <p>The body of a rule set consists of an ordered, semicolon-delimited list of <em>rules.</em>
* Internally, every rule has a base value, a divisor, rule text, and zero, one, or two <em>substitutions.</em>
@ -210,48 +209,46 @@ import com.ibm.icu.util.UResourceBundleIterator;
* <p>A rule descriptor can take one of the following forms (text in <em>italics</em> is the
* name of a token):</p>
*
* <table border="0" width="100%">
* <table style="border-collapse: collapse;">
* <tr>
* <td style="width: 5%; vertical-align: top;"></td>
* <td style="width: 8%; vertical-align: top;"><em>bv</em>:</td>
* <td valign="top"><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal
* <th style="padding-left: 1em; padding-right: 1em;">Descriptor</th>
* <th>Description</th>
* </tr>
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;"><em>bv</em>:</td>
* <td style="vertical-align: top;"><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal
* number expressed using ASCII digits. <em>bv</em> may contain spaces, period, and commas,
* which are ignored. The rule's divisor is the highest power of 10 less than or equal to
* the base value.</td>
* </tr>
* <tr>
* <td style="width: 5%; vertical-align: top;"></td>
* <td style="width: 8%; vertical-align: top;"><em>bv</em>/<em>rad</em>:</td>
* <td valign="top"><em>bv</em> specifies the rule's base value. The rule's divisor is the
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;"><em>bv</em>/<em>rad</em>:</td>
* <td style="vertical-align: top;"><em>bv</em> specifies the rule's base value. The rule's divisor is the
* highest power of <em>rad</em> less than or equal to the base value.</td>
* </tr>
* <tr>
* <td style="width: 5%; vertical-align: top;"></td>
* <td style="width: 8%; vertical-align: top;"><em>bv</em>&gt;:</td>
* <td valign="top"><em>bv</em> specifies the rule's base value. To calculate the divisor,
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;"><em>bv</em>&gt;:</td>
* <td style="vertical-align: top;"><em>bv</em> specifies the rule's base value. To calculate the divisor,
* let the radix be 10, and the exponent be the highest exponent of the radix that yields a
* result less than or equal to the base value. Every &gt; character after the base value
* decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
* raised to the power of the exponent; otherwise, the divisor is 1.</td>
* </tr>
* <tr>
* <td style="width: 5%; vertical-align: top;"></td>
* <td style="width: 8%; vertical-align: top;"><em>bv</em>/<em>rad</em>&gt;:</td>
* <td valign="top"><em>bv</em> specifies the rule's base value. To calculate the divisor,
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;"><em>bv</em>/<em>rad</em>&gt;:</td>
* <td style="vertical-align: top;"><em>bv</em> specifies the rule's base value. To calculate the divisor,
* let the radix be <em>rad</em>, and the exponent be the highest exponent of the radix that
* yields a result less than or equal to the base value. Every &gt; character after the radix
* decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
* raised to the power of the exponent; otherwise, the divisor is 1.</td>
* </tr>
* <tr>
* <td style="width: 5%; vertical-align: top;"></td>
* <td style="width: 8%; vertical-align: top;">-x:</td>
* <td valign="top">The rule is a negative-number rule.</td>
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;">-x:</td>
* <td style="vertical-align: top;">The rule is a negative-number rule.</td>
* </tr>
* <tr>
* <td style="width: 5%; vertical-align: top;"></td>
* <td style="width: 8%; vertical-align: top;">x.x:</td>
* <td valign="top">The rule is an <em>improper fraction rule</em>. If the full stop in
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;">x.x:</td>
* <td style="vertical-align: top;">The rule is an <em>improper fraction rule</em>. If the full stop in
* the middle of the rule name is replaced with the decimal point
* that is used in the language or DecimalFormatSymbols, then that rule will
* have precedence when formatting and parsing this rule. For example, some
@ -260,10 +257,9 @@ import com.ibm.icu.util.UResourceBundleIterator;
* handle the decimal point that matches the language's natural spelling of
* the punctuation of either the full stop or comma.</td>
* </tr>
* <tr>
* <td style="width: 5%; vertical-align: top;"></td>
* <td style="width: 8%; vertical-align: top;">0.x:</td>
* <td valign="top">The rule is a <em>proper fraction rule</em>. If the full stop in
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;">0.x:</td>
* <td style="vertical-align: top;">The rule is a <em>proper fraction rule</em>. If the full stop in
* the middle of the rule name is replaced with the decimal point
* that is used in the language or DecimalFormatSymbols, then that rule will
* have precedence when formatting and parsing this rule. For example, some
@ -272,10 +268,9 @@ import com.ibm.icu.util.UResourceBundleIterator;
* handle the decimal point that matches the language's natural spelling of
* the punctuation of either the full stop or comma</td>
* </tr>
* <tr>
* <td style="width: 5%; vertical-align: top;"></td>
* <td style="width: 8%; vertical-align: top;">x.0:</td>
* <td valign="top">The rule is a <em>default rule</em>. If the full stop in
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;">x.0:</td>
* <td style="vertical-align: top;">The rule is a <em>default rule</em>. If the full stop in
* the middle of the rule name is replaced with the decimal point
* that is used in the language or DecimalFormatSymbols, then that rule will
* have precedence when formatting and parsing this rule. For example, some
@ -284,19 +279,16 @@ import com.ibm.icu.util.UResourceBundleIterator;
* handle the decimal point that matches the language's natural spelling of
* the punctuation of either the full stop or comma</td>
* </tr>
* <tr>
* <td style="width: 5%; vertical-align: top;"></td>
* <td style="width: 8%; vertical-align: top;">Inf:</td>
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;">Inf:</td>
* <td style="vertical-align: top;">The rule for infinity.</td>
* </tr>
* <tr>
* <td style="width: 5%; vertical-align: top;"></td>
* <td style="width: 8%; vertical-align: top;">NaN:</td>
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;">NaN:</td>
* <td style="vertical-align: top;">The rule for an IEEE 754 NaN (not a number).</td>
* </tr>
* <tr>
* <td style="width: 5%; vertical-align: top;"></td>
* <td style="width: 8%; vertical-align: top;"><em>nothing</em></td>
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;"><em>nothing</em></td>
* <td style="vertical-align: top;">If the rule's rule descriptor is left out, the base value is one plus the
* preceding rule's base value (or zero if this is the first rule in the list) in a normal
* rule set.&nbsp; In a fraction rule set, the base value is the same as the preceding rule's
@ -313,8 +305,8 @@ import com.ibm.icu.util.UResourceBundleIterator;
* algorithms: If the rule set is a regular rule set, do the following:
*
* <ul>
* <li>If the rule set includes a default rule (and the number was passed in as a <tt>double</tt>),
* use the default rule.&nbsp; (If the number being formatted was passed in as a <tt>long</tt>,
* <li>If the rule set includes a default rule (and the number was passed in as a <code>double</code>),
* use the default rule.&nbsp; (If the number being formatted was passed in as a <code>long</code>,
* the default rule is ignored.)</li>
* <li>If the number is negative, use the negative-number rule.</li>
* <li>If the number has a fractional part and is greater than 1, use the improper fraction
@ -361,49 +353,43 @@ import com.ibm.icu.util.UResourceBundleIterator;
*
* <p>The meanings of the substitution token characters are as follows:</p>
*
* <table border="0" width="100%">
* <table style="border-collapse: collapse;">
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;">&gt;&gt;</td>
* <td style="width: 165; vertical-align: top;">in normal rule</td>
* <th>Syntax</th>
* <th>Usage</th>
* <th>Description</th>
* </tr>
* <tr style="border-top: 1px solid black;">
* <td style="white-space: nowrap;" rowspan="4">&gt;&gt;</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in normal rule</td>
* <td>Divide the number by the rule's divisor and format the remainder</td>
* </tr>
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;"></td>
* <td style="width: 165; vertical-align: top;">in negative-number rule</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in negative-number rule</td>
* <td>Find the absolute value of the number and format the result</td>
* </tr>
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;"></td>
* <td style="width: 165; vertical-align: top;">in fraction or default rule</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in fraction or default rule</td>
* <td>Isolate the number's fractional part and format it.</td>
* </tr>
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;"></td>
* <td style="width: 165; vertical-align: top;">in rule in fraction rule set</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in rule in fraction rule set</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;">&gt;&gt;&gt;</td>
* <td style="width: 165; vertical-align: top;">in normal rule</td>
* <tr style="border-top: 1px solid black;">
* <td style="white-space: nowrap;" rowspan="2">&gt;&gt;&gt;</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in normal rule</td>
* <td>Divide the number by the rule's divisor and format the remainder,
* but bypass the normal rule-selection process and just use the
* rule that precedes this one in this rule list.</td>
* </tr>
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;"></td>
* <td style="width: 165; vertical-align: top;">in all other rules</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in all other rules</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;">&lt;&lt;</td>
* <td style="width: 165; vertical-align: top;">in normal rule</td>
* <tr style="border-top: 1px solid black;">
* <td style="white-space: nowrap;" rowspan="4">&lt;&lt;</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in normal rule</td>
* <td>Divide the number by the rule's divisor, perform floor() on the quotient,
* and format the resulting value.<br>
* If there is a DecimalFormat pattern between the &lt; characters and the
@ -416,83 +402,93 @@ import com.ibm.icu.util.UResourceBundleIterator;
* </td>
* </tr>
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;"></td>
* <td style="width: 165; vertical-align: top;">in negative-number rule</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in negative-number rule</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;"></td>
* <td style="width: 165; vertical-align: top;">in fraction or default rule</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in fraction or default rule</td>
* <td>Isolate the number's integral part and format it.</td>
* </tr>
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;"></td>
* <td style="width: 165; vertical-align: top;">in rule in fraction rule set</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in rule in fraction rule set</td>
* <td>Multiply the number by the rule's base value and format the result.</td>
* </tr>
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;">==</td>
* <td style="width: 165; vertical-align: top;">in all rule sets</td>
* <tr style="border-top: 1px solid black;">
* <td style="white-space: nowrap;">==</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in all rule sets</td>
* <td>Format the number unchanged</td>
* </tr>
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;">[]</td>
* <td style="width: 165; vertical-align: top;">in normal rule</td>
* <td>Omit the optional text if the number is an even multiple of the rule's divisor</td>
* <tr style="border-top: 1px solid black;">
* <td style="white-space: nowrap;" rowspan="6">[]<br/>[|]</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in normal rule</td>
* <td>
* <ul>
* <li>When the number is not an even multiple of the rule's divisor, use the text and rules between the beginning square bracket,
* and the end square bracket or the | symbol.</li>
* <li>When the number is an even multiple of the rule's divisor, and no | symbol is used, omit the text.</li>
* <li>When the number is an even multiple of the rule's divisor, and | symbol is used, use the text and rules between the | symbol,
* and the end square bracket.</li>
* </ul>
* </td>
* </tr>
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;"></td>
* <td style="width: 165; vertical-align: top;">in negative-number rule</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in improper-fraction rule</td>
* <td>This syntax is the same as specifying both an x.x rule and a 0.x rule.
* <ul>
* <li>When the number is not between 0 and 1, use the text and rules between the beginning square bracket,
* and the end square bracket or the | symbol.</li>
* <li>When the number is between 0 and 1, and no | symbol is used, omit the text.</li>
* <li>When the number is between 0 and 1, and | symbol is used, use the text and rules between the | symbol,
* and the end square bracket.</li>
* </ul>
* </td>
* </tr>
* <tr>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in default rule</td>
* <td>This syntax is the same as specifying both an x.x rule and an x.0 rule.
* <ul>
* <li>When the number is not an integer, use the text and rules between the beginning square bracket,
* and the end square bracket or the | symbol.</li>
* <li>When the number is an integer, and no | symbol is used, omit the text.</li>
* <li>When the number is an integer, and | symbol is used, use the text and rules between the | symbol,
* and the end square bracket.</li>
* </ul>
* </td>
* </tr>
* <tr>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in rule in fraction rule set</td>
* <td>
* <ul>
* <li>When multiplying the number by the rule's base value does not yield 1, use the text and rules between the beginning square bracket,
* and the end square bracket or the | symbol.</li>
* <li>When multiplying the number by the rule's base value yields 1, and no | symbol is used, omit the text.</li>
* <li>When multiplying the number by the rule's base value yields 1, and | symbol is used, use the text and rules between the | symbol,
* and the end square bracket.</li>
* </ul>
* </td>
* </tr>
* <tr>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in proper-fraction rule</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;"></td>
* <td style="width: 165; vertical-align: top;">in improper-fraction rule</td>
* <td>Omit the optional text if the number is between 0 and 1 (same as specifying both an
* x.x rule and a 0.x rule)</td>
* </tr>
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;"></td>
* <td style="width: 165; vertical-align: top;">in default rule</td>
* <td>Omit the optional text if the number is an integer (same as specifying both an x.x
* rule and an x.0 rule)</td>
* </tr>
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;"></td>
* <td style="width: 165; vertical-align: top;">in proper-fraction rule</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in negative-number rule</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td style="width: 37;"></td>
* <td style="width: 23;"></td>
* <td style="width: 165; vertical-align: top;">in rule in fraction rule set</td>
* <td>Omit the optional text if multiplying the number by the rule's base value yields 1.</td>
* </tr>
* <tr>
* <td style="width: 37;">$(cardinal,<i>plural syntax</i>)$</td>
* <td style="width: 23;"></td>
* <td style="width: 165; vertical-align: top;">in all rule sets</td>
* <tr style="border-top: 1px solid black;">
* <td style="white-space: nowrap;">$(cardinal,<i>plural syntax</i>)$</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in all rule sets</td>
* <td>This provides the ability to choose a word based on the number divided by the radix to the power of the
* exponent of the base value for the specified locale, which is normally equivalent to the &lt;&lt; value.
* This uses the cardinal plural rules from PluralFormat. All strings used in the plural format are treated
* This uses the cardinal plural rules from {@link PluralFormat}. All strings used in the plural format are treated
* as the same base value for parsing.</td>
* </tr>
* <tr>
* <td style="width: 37;">$(ordinal,<i>plural syntax</i>)$</td>
* <td style="width: 23;"></td>
* <td style="width: 165; vertical-align: top;">in all rule sets</td>
* <tr style="border-top: 1px solid black;">
* <td style="white-space: nowrap;">$(ordinal,<i>plural syntax</i>)$</td>
* <td style="white-space: nowrap; vertical-align: top; padding-left: 1em; padding-right: 1em;">in all rule sets</td>
* <td>This provides the ability to choose a word based on the number divided by the radix to the power of the
* exponent of the base value for the specified locale, which is normally equivalent to the &lt;&lt; value.
* This uses the ordinal plural rules from PluralFormat. All strings used in the plural format are treated
* This uses the ordinal plural rules from {@link PluralFormat}. All strings used in the plural format are treated
* as the same base value for parsing.</td>
* </tr>
* </table>
@ -500,22 +496,23 @@ import com.ibm.icu.util.UResourceBundleIterator;
* <p>The substitution descriptor (i.e., the text between the token characters) may take one
* of three forms:</p>
*
* <table border="0" width="100%">
* <table style="border-collapse: collapse;">
* <tr>
* <td style="width: 42;"></td>
* <td style="width: 166; vertical-align: top;">a rule set name</td>
* <th>Descriptor</th>
* <th>Description</th>
* </tr>
* <tr>
* <td style="vertical-align: top;">a rule set name</td>
* <td>Perform the mathematical operation on the number, and format the result using the
* named rule set.</td>
* </tr>
* <tr>
* <td style="width: 42;"></td>
* <td style="width: 166; vertical-align: top;">a DecimalFormat pattern</td>
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;">a DecimalFormat pattern</td>
* <td>Perform the mathematical operation on the number, and format the result using a
* DecimalFormat with the specified pattern.&nbsp; The pattern must begin with 0 or #.</td>
* </tr>
* <tr>
* <td style="width: 42;"></td>
* <td style="width: 166; vertical-align: top;">nothing</td>
* <tr style="border-top: 1px solid black;">
* <td style="vertical-align: top;">nothing</td>
* <td>Perform the mathematical operation on the number, and format the result using the rule
* set containing the current rule, except:<ul>
* <li>You can't have an empty substitution descriptor with a == substitution.</li>
@ -553,7 +550,7 @@ public class RuleBasedNumberFormat extends NumberFormat {
//-----------------------------------------------------------------------
// Generated by serialver from JDK 1.4.1_01
static final long serialVersionUID = -7664252765575395068L;
private static final long serialVersionUID = -7664252765575395068L;
/**
* Selector code that tells the constructor to create a spellout formatter
@ -1164,7 +1161,7 @@ public class RuleBasedNumberFormat extends NumberFormat {
/**
* Formats the specified number according to the specified rule set.
* (If the specified rule set specifies a default ["x.0"] rule, this function
* ignores it. Convert the number to a double first if you ned it.) This
* ignores it. Convert the number to a double first if you need it.) This
* function preserves all the precision in the long-- it doesn't convert it
* to a double.
* @param number The number to format.
@ -1210,7 +1207,7 @@ public class RuleBasedNumberFormat extends NumberFormat {
* Formats the specified number using the formatter's default rule set.
* (The default rule set is the last public rule set defined in the description.)
* (If the specified rule set specifies a default ["x.0"] rule, this function
* ignores it. Convert the number to a double first if you ned it.) This
* ignores it. Convert the number to a double first if you need it.) This
* function preserves all the precision in the long-- it doesn't convert it
* to a double.
* @param number The number to format.
@ -1305,7 +1302,7 @@ public class RuleBasedNumberFormat extends NumberFormat {
public Number parse(String text, ParsePosition parsePosition) {
// parsePosition tells us where to start parsing. We copy the
// text in the string from here to the end inro a new string,
// text in the string from here to the end into a new string,
// and create a new ParsePosition and result variable to use
// for the duration of the parse operation
String workingText = text.substring(parsePosition.getIndex());
@ -1334,10 +1331,9 @@ public class RuleBasedNumberFormat extends NumberFormat {
result = tempResult;
highWaterMark.setIndex(workingPos.getIndex());
}
// commented out because this API on ParsePosition doesn't exist in 1.1.x
// if (workingPos.getErrorIndex() > highWaterMark.getErrorIndex()) {
// highWaterMark.setErrorIndex(workingPos.getErrorIndex());
// }
if (workingPos.getErrorIndex() > highWaterMark.getErrorIndex()) {
highWaterMark.setErrorIndex(workingPos.getErrorIndex());
}
// if we manage to use up all the characters in the string,
// we don't have to try any more rule sets
@ -1350,13 +1346,12 @@ public class RuleBasedNumberFormat extends NumberFormat {
workingPos.setIndex(0);
}
// add the high water mark to our original parse position and
// add the high watermark to our original parse position and
// return the result
parsePosition.setIndex(parsePosition.getIndex() + highWaterMark.getIndex());
// commented out because this API on ParsePosition doesn't exist in 1.1.x
// if (highWaterMark.getIndex() == 0) {
// parsePosition.setErrorIndex(parsePosition.getIndex() + highWaterMark.getErrorIndex());
// }
if (highWaterMark.getIndex() == 0) {
parsePosition.setErrorIndex(parsePosition.getIndex() + highWaterMark.getErrorIndex());
}
return result;
}
@ -1668,7 +1663,7 @@ public class RuleBasedNumberFormat extends NumberFormat {
* @param specialName the name of the special rule text to extract
* @return the special rule text, or null if the rule was not found
*/
private String extractSpecial(StringBuilder description, String specialName) {
private static String extractSpecial(StringBuilder description, String specialName) {
String result = null;
int lp = description.indexOf(specialName);
if (lp != -1) {
@ -1701,7 +1696,7 @@ public class RuleBasedNumberFormat extends NumberFormat {
}
/**
* This function parses the description and uses it to build all of
* This function parses the description and uses it to build all of the
* internal data structures that the formatter uses to do formatting
* @param description The description of the formatter's desired behavior.
* This is either passed in by the caller or loaded out of a resource
@ -1747,7 +1742,7 @@ public class RuleBasedNumberFormat extends NumberFormat {
// divide up the descriptions into individual rule-set descriptions
// and store them in a temporary array. At each step, we also
// new up a rule set, but all this does is initialize its name
// create a rule set, but all this does is initialize its name
// and remove it from its description. We can't actually parse
// the rest of the descriptions and finish initializing everything
// because we have to know the names and locations of all the rule
@ -1806,8 +1801,8 @@ public class RuleBasedNumberFormat extends NumberFormat {
defaultRuleSet = ruleSets[ruleSets.length - 1];
}
// finally, we can go back through the temporary descriptions
// list and finish setting up the substructure
// Now that we know all the rule names, we can go back through
// the temporary descriptions list and finish setting up the substructure
for (int i = 0; i < ruleSets.length; i++) {
ruleSets[i].parseRules(ruleSetDescriptions[i]);
}
@ -1902,40 +1897,34 @@ public class RuleBasedNumberFormat extends NumberFormat {
// iterate through the characters...
int start = 0;
char ch;
while (start < descriptionLength) {
// seek to the first non-whitespace character...
// Seek to the first non-whitespace character...
// If the first non-whitespace character is semicolon, skip it and continue
while (start < descriptionLength
&& PatternProps.isWhiteSpace(description.charAt(start)))
&& (PatternProps.isWhiteSpace(ch = description.charAt(start)) || ch == ';'))
{
++start;
}
//if the first non-whitespace character is semicolon, skip it and continue
if (start < descriptionLength && description.charAt(start) == ';') {
start += 1;
continue;
}
// locate the next semicolon in the text and copy the text from
// our current position up to that semicolon into the result
int p = description.indexOf(';', start);
if (p == -1) {
// or if we don't find a semicolon, just copy the rest of
// the string into the result
result.append(description.substring(start));
result.append(description, start, descriptionLength);
break;
}
else if (p < descriptionLength) {
result.append(description.substring(start, p + 1));
start = p + 1;
}
else {
// when we get here, we've seeked off the end of the string, and
// we terminate the loop (we continue until *start* is -1 rather
// than until *p* is -1, because otherwise we'd miss the last
// rule in the description)
break;
int end = p + 1;
result.append(description, start, end);
start = end;
}
// when we get here from the else, we've seeked off the end of the string, and
// we terminate the loop (we continue until *start* is -1 rather
// than until *p* is -1, because otherwise we'd miss the last
// rule in the description)
}
return result;
}

View file

@ -11,7 +11,7 @@
<title>ICU4J com.ibm.icu.text Package Overview</title>
</head>
<body bgcolor="white">
Extensions and enhancements to java.text to support unicode transforms, UnicodeSet, surrogate char utilities, UCA collation, normalization, break iteration (rule and dictionary based), enhanced number format, international string searching, and arabic shaping.</p>
<p>Extensions and enhancements to java.text to support unicode transforms, UnicodeSet, surrogate char utilities, UCA collation, normalization, break iteration (rule and dictionary based), enhanced number format, international string searching, and arabic shaping.</p>
<ul>
<li>Unicode Transforms (Transliteration) convert between different representations of unicode text.</li>