ICU-11276 Initial NumberRangeFormatter implementation. Needs data loading and implementation of new methods on Modifier.

2025-04-07 06:25:30 +00:00 · 2018-09-05 17:42:41 -07:00 · 2018-09-05 17:42:41 -07:00 · 238271f27b
commit 238271f27b
parent c5e458ba8d
12 changed files with 297 additions and 8 deletions
--- a/icu4c/source/i18n/i18n.vcxproj
+++ b/icu4c/source/i18n/i18n.vcxproj
@ -554,7 +554,7 @@
    <ClInclude Include="numparse_validators.h" />
    <ClInclude Include="numparse_types.h" />
    <ClInclude Include="numparse_utils.h" />
-    <ClInclude Include="numrange_types.h" />
+    <ClInclude Include="numrange_impl.h" />
  </ItemGroup>
  <ItemGroup>
    <ResourceCompile Include="i18n.rc" />
--- a/icu4c/source/i18n/i18n.vcxproj.filters
+++ b/icu4c/source/i18n/i18n.vcxproj.filters
@ -920,7 +920,7 @@
    <ClInclude Include="numparse_utils.h">
      <Filter>formatting</Filter>
    </ClInclude>
-    <ClInclude Include="numrange_types.h">
+    <ClInclude Include="numrange_impl.h">
      <Filter>formatting</Filter>
    </ClInclude>
    <ClInclude Include="olsontz.h">
--- a/icu4c/source/i18n/i18n_uwp.vcxproj
+++ b/icu4c/source/i18n/i18n_uwp.vcxproj
@ -659,7 +659,7 @@
    <ClInclude Include="numparse_validators.h" />
    <ClInclude Include="numparse_types.h" />
    <ClInclude Include="numparse_utils.h" />
-    <ClInclude Include="numrange_types.h" />
+    <ClInclude Include="numrange_impl.h" />
  </ItemGroup>
  <ItemGroup>
    <ResourceCompile Include="i18n.rc" />
--- a/icu4c/source/i18n/number_modifiers.h
+++ b/icu4c/source/i18n/number_modifiers.h
@ -37,6 +37,10 @@ class U_I18N_API ConstantAffixModifier : public Modifier, public UObject {

    bool isStrong() const U_OVERRIDE;

+    bool containsField(UNumberFormatFields field) const U_OVERRIDE;
+
+    bool operator==(const Modifier& other) const U_OVERRIDE;
+
  private:
    UnicodeString fPrefix;
    UnicodeString fSuffix;
@ -64,6 +68,10 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory {

    bool isStrong() const U_OVERRIDE;

+    bool containsField(UNumberFormatFields field) const U_OVERRIDE;
+
+    bool operator==(const Modifier& other) const U_OVERRIDE;
+
    /**
     * TODO: This belongs in SimpleFormatterImpl. The only reason I haven't moved it there yet is because
     * DoubleSidedStringBuilder is an internal class and SimpleFormatterImpl feels like it should not depend on it.
@ -122,6 +130,10 @@ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory {

    bool isStrong() const U_OVERRIDE;

+    bool containsField(UNumberFormatFields field) const U_OVERRIDE;
+
+    bool operator==(const Modifier& other) const U_OVERRIDE;
+
  protected:
    // NOTE: In Java, these are stored as array pointers. In C++, the NumberStringBuilder is stored by
    // value and is treated internally as immutable.
@ -206,6 +218,16 @@ class U_I18N_API EmptyModifier : public Modifier, public UMemory {
        return fStrong;
    }

+    bool containsField(UNumberFormatFields field) const U_OVERRIDE {
+        (void)field;
+        return false;
+    }
+
+    bool operator==(const Modifier& other) const U_OVERRIDE {
+        UErrorCode status = U_ZERO_ERROR;
+        return other.getCodePointCount(status) == 0;
+    }
+
  private:
    bool fStrong;
 };
--- a/icu4c/source/i18n/number_patternmodifier.h
+++ b/icu4c/source/i18n/number_patternmodifier.h
@ -184,6 +184,10 @@ class U_I18N_API MutablePatternModifier

    bool isStrong() const U_OVERRIDE;

+    bool containsField(UNumberFormatFields field) const U_OVERRIDE;
+
+    bool operator==(const Modifier& other) const U_OVERRIDE;
+
    /**
     * Returns the string that substitutes a given symbol type in a pattern.
     */
--- a/icu4c/source/i18n/number_scientific.h
+++ b/icu4c/source/i18n/number_scientific.h
@ -30,6 +30,10 @@ class U_I18N_API ScientificModifier : public UMemory, public Modifier {

    bool isStrong() const U_OVERRIDE;

+    bool containsField(UNumberFormatFields field) const U_OVERRIDE;
+
+    bool operator==(const Modifier& other) const U_OVERRIDE;
+
  private:
    int32_t fExponent;
    const ScientificHandler *fHandler;
--- a/icu4c/source/i18n/number_types.h
+++ b/icu4c/source/i18n/number_types.h
@ -127,6 +127,7 @@ class U_I18N_API AffixPatternProvider {
    virtual bool hasBody() const = 0;
 };

+
 /**
 * A Modifier is an object that can be passed through the formatting pipeline until it is finally applied to the string
 * builder. A Modifier usually contains a prefix and a suffix that are applied, but it could contain something else,
@ -177,6 +178,16 @@ class U_I18N_API Modifier {
     * @return Whether the modifier is strong.
     */
    virtual bool isStrong() const = 0;
+
+    /**
+     * Whether the modifier contains at least one occurrence of the given field.
+     */
+    virtual bool containsField(UNumberFormatFields field) const = 0;
+
+    /**
+     * Returns whether the affixes owned by this modifier are equal to the ones owned by the given modifier.
+     */
+    virtual bool operator==(const Modifier& other) const = 0;
 };

 /**
--- a/icu4c/source/i18n/numrange_fluent.cpp
+++ b/icu4c/source/i18n/numrange_fluent.cpp
@ -9,7 +9,7 @@
 // Helpful in toString methods and elsewhere.
 #define UNISTR_FROM_STRING_EXPLICIT

-#include "numrange_types.h"
+#include "numrange_impl.h"
 #include "util.h"
 #include "number_utypes.h"

@ -241,7 +241,9 @@ FormattedNumberRange LocalizedNumberRangeFormatter::formatFormattableRange(

 void LocalizedNumberRangeFormatter::formatImpl(
        UFormattedNumberRangeData* results, UErrorCode& status) const {
-    // TODO: This is a placeholder implementation.
+
+    MicroProps microsFirst;
+    MicroProps microsSecond;

    UFormattedNumberData r1;
    r1.quantity = results->quantity1;
--- a/icu4c/source/i18n/numrange_impl.cpp
+++ b/icu4c/source/i18n/numrange_impl.cpp
@ -9,14 +9,220 @@
 // Helpful in toString methods and elsewhere.
 #define UNISTR_FROM_STRING_EXPLICIT

-#include "numrange_types.h"
+#include "numrange_impl.h"

 using namespace icu;
 using namespace icu::number;
 using namespace icu::number::impl;

+namespace {
+
+// Helper function for 2-dimensional switch statement
+constexpr int8_t identity2d(UNumberIdentityFallback a, UNumberRangeIdentityResult b) {
+    return static_cast<int8_t>(a) | (static_cast<int8_t>(b) << 4);
+}
+
+} // namespace
+
+void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const {
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    // Identity case 1: equal before rounding
+    if (equalBeforeRounding) {
+    }
+
+    MicroProps micros1;
+    MicroProps micros2;
+    formatterImpl1.preProcess(data.quantity1, micros1, status);
+    formatterImpl2.preProcess(data.quantity2, micros2, status);
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    // Check for identity
+    if (equalBeforeRounding) {
+        data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING;
+    } else if (data.quantity1 == data.quantity2) {
+        data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING;
+    } else {
+        data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
+    }
+
+    switch (identity2d(fIdentityFallback, data.identityResult)) {
+        case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
+                        UNUM_IDENTITY_RESULT_NOT_EQUAL):
+        case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
+                        UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
+        case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
+                        UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
+        case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
+                        UNUM_IDENTITY_RESULT_NOT_EQUAL):
+        case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
+                        UNUM_IDENTITY_RESULT_NOT_EQUAL):
+        case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
+                        UNUM_IDENTITY_RESULT_NOT_EQUAL):
+            formatRange(data, micros1, micros2, status);
+            break;
+
+        case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
+                        UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
+        case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
+                        UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
+        case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
+                        UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
+            formatApproximately(data, micros1, micros2, status);
+            break;
+
+        case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
+                        UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
+        case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
+                        UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
+        case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
+                        UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
+            formatSingleValue(data, micros1, micros2, status);
+            break;
+
+        default:
+            U_ASSERT(false);
+            break;
+    }
+}


+void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data,
+                                                 MicroProps& micros1, MicroProps& micros2,
+                                                 UErrorCode& status) const {
+    if (fSameFormatters) {
+        formatterImpl1.format(data.quantity1, data.string, status);
+    } else {
+        formatRange(data, micros1, micros2, status);
+    }
+}
+
+
+void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& data,
+                                                    MicroProps& micros1, MicroProps& micros2,
+                                                    UErrorCode& status) const {
+    if (fSameFormatters) {
+        // FIXME
+        formatterImpl1.format(data.quantity1, data.string, status);
+        data.string.insertCodePoint(0, u'~', UNUM_FIELD_COUNT, status);
+    } else {
+        formatRange(data, micros1, micros2, status);
+    }
+}
+
+
+void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
+                                           MicroProps& micros1, MicroProps& micros2,
+                                           UErrorCode& status) const {
+
+    // modInner is always notation (scientific); collapsable in ALL.
+    // modOuter is always units; collapsable in ALL, AUTO, and UNIT.
+    // modMiddle could be either; collapsable in ALL and sometimes AUTO and UNIT.
+    // Never collapse an outer mod but not an inner mod.
+    bool collapseOuter, collapseMiddle, collapseInner;
+    switch (fCollapse) {
+        case UNUM_RANGE_COLLAPSE_ALL:
+        case UNUM_RANGE_COLLAPSE_AUTO:
+        case UNUM_RANGE_COLLAPSE_UNIT:
+        {
+            // OUTER MODIFIER
+            collapseOuter = *micros1.modOuter == *micros2.modOuter;
+
+            if (!collapseOuter) {
+                // Never collapse inner mods if outer mods are not collapsable
+                collapseMiddle = false;
+                collapseInner = false;
+                break;
+            }
+
+            // MIDDLE MODIFIER
+            collapseMiddle = *micros1.modMiddle == *micros2.modMiddle;
+
+            if (!collapseMiddle) {
+                // Never collapse inner mods if outer mods are not collapsable
+                collapseInner = false;
+                break;
+            }
+
+            // MIDDLE MODIFIER HEURISTICS
+            // (could disable collapsing of the middle modifier)
+            // The modifiers are equal by this point, so we can look at just one of them.
+            const Modifier* mm = micros1.modMiddle;
+            if (mm == nullptr) {
+                // pass
+            } else if (fCollapse == UNUM_RANGE_COLLAPSE_UNIT) {
+                // Only collapse if the modifier is a unit.
+                // TODO: Handle case where the modifier has both notation and unit (compact currency)?
+                if (!mm->containsField(UNUM_CURRENCY_FIELD) && !mm->containsField(UNUM_PERCENT_FIELD)) {
+                    collapseMiddle = false;
+                }
+            } else if (fCollapse == UNUM_RANGE_COLLAPSE_AUTO) {
+                // Heuristic as of ICU 63: collapse only if the modifier is exactly one code point.
+                if (mm->getCodePointCount(status) != 1) {
+                    collapseMiddle = false;
+                }
+            }
+
+            if (!collapseMiddle || fCollapse != UNUM_RANGE_COLLAPSE_ALL) {
+                collapseInner = false;
+                break;
+            }
+
+            // INNER MODIFIER
+            collapseInner = *micros1.modInner == *micros2.modInner;
+
+            // All done checking for collapsability.
+            break;
+        }
+
+        default:
+            collapseOuter = false;
+            collapseMiddle = false;
+            collapseInner = false;
+            break;
+    }
+
+    NumberStringBuilder& string = data.string;
+    int32_t length1 = 0;
+    int32_t lengthShared = 0;
+    int32_t length2 = 0;
+    #define UPRV_INDEX_0 0
+    #define UPRV_INDEX_1 length1
+    #define UPRV_INDEX_2 length1 + lengthShared
+    #define UPRV_INDEX_3 length1 + lengthShared + length2
+
+    // TODO: Use localized pattern
+    lengthShared += string.insert(UPRV_INDEX_0, u" --- ", UNUM_FIELD_COUNT, status);
+    length1 += NumberFormatterImpl::writeNumber(micros1, data.quantity1, string, UPRV_INDEX_0, status);
+    length2 += NumberFormatterImpl::writeNumber(micros2, data.quantity2, string, UPRV_INDEX_0, status);
+
+    // TODO: Support padding?
+
+    if (collapseInner) {
+        lengthShared += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
+    } else {
+        length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
+        length2 += micros1.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
+    }
+
+    if (collapseMiddle) {
+        lengthShared += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
+    } else {
+        length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
+        length2 += micros1.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
+    }
+
+    if (collapseOuter) {
+        micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
+    } else {
+        micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
+        micros1.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
+    }
+}



--- a/icu4c/source/i18n/numrange_types.h
+++ b/icu4c/source/i18n/numrange_types.h
@ -11,6 +11,7 @@
 #include "unicode/numberrangeformatter.h"
 #include "number_types.h"
 #include "number_decimalquantity.h"
+#include "number_formatimpl.h"
 #include "number_stringbuilder.h"

 U_NAMESPACE_BEGIN namespace number {
@ -36,12 +37,38 @@ struct UFormattedNumberRangeData : public UMemory {
    DecimalQuantity quantity1;
    DecimalQuantity quantity2;
    NumberStringBuilder string;
-    UNumberRangeIdentityResult identityResult = UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING;
+    UNumberRangeIdentityResult identityResult = UNUM_IDENTITY_RESULT_COUNT;

    // No C conversion methods (no C API yet)
 };


+class NumberRangeFormatterImpl : public UMemory {
+  public:
+    void format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const;
+
+  private:
+    NumberFormatterImpl formatterImpl1;
+    NumberFormatterImpl formatterImpl2;
+    bool fSameFormatters;
+
+    UNumberRangeCollapse fCollapse;
+    UNumberRangeIdentityFallback fIdentityFallback;
+
+    void formatSingleValue(UFormattedNumberRangeData& data,
+                           MicroProps& micros1, MicroProps& micros2,
+                           UErrorCode& status) const;
+
+    void formatApproximately(UFormattedNumberRangeData& data,
+                             MicroProps& micros1, MicroProps& micros2,
+                             UErrorCode& status) const;
+
+    void formatRange(UFormattedNumberRangeData& data,
+                     MicroProps& micros1, MicroProps& micros2,
+                     UErrorCode& status) const;
+};
+
+
 } // namespace impl
 } // namespace number
 U_NAMESPACE_END
--- a/icu4c/source/i18n/unicode/numberformatter.h
+++ b/icu4c/source/i18n/unicode/numberformatter.h
@ -144,6 +144,7 @@ class MultiplierFormatHandler;
 class CurrencySymbols;
 class GeneratorHelpers;
 class DecNum;
+class NumberRangeFormatterImpl;

 } // namespace impl

@ -2188,6 +2189,9 @@ class U_I18N_API UnlocalizedNumberFormatter

    // To give NumberFormatter::with() access to this class's constructor:
    friend class NumberFormatter;
+
+    // Give NumberRangeFormatter access to the MacroProps
+    friend class NumberRangeFormatterImpl;
 };

 /**
--- a/icu4c/source/i18n/unicode/numberrangeformatter.h
+++ b/icu4c/source/i18n/unicode/numberrangeformatter.h
@ -148,7 +148,16 @@ typedef enum UNumberRangeIdentityResult {
     * @draft ICU 63
     * @see NumberRangeFormatter
     */
-    UNUM_IDENTITY_RESULT_NOT_EQUAL
+    UNUM_IDENTITY_RESULT_NOT_EQUAL,
+
+#ifndef U_HIDE_INTERNAL_API
+    /**
+     * The number of entries in this enum.
+     * @internal
+     */
+    UNUM_IDENTITY_RESULT_COUNT
+#endif
+
 } UNumberRangeIdentityResult;

 U_NAMESPACE_BEGIN