From 21f74b369898a3d63e01ca4b8590ed630094e657 Mon Sep 17 00:00:00 2001 From: Rich Gillam Date: Wed, 30 Aug 2023 19:38:29 -0400 Subject: [PATCH] ICU-22455 Implemented algorithm in CLDR-16981 to preserve regional unit overrides when they don't conflict with the ms subtag. --- icu4c/source/i18n/units_data.cpp | 90 +++++++++++-------- icu4c/source/i18n/units_data.h | 1 + icu4c/source/test/intltest/numbertest.h | 1 + icu4c/source/test/intltest/numbertest_api.cpp | 2 +- .../test/intltest/numbertest_skeletons.cpp | 50 +++++++++++ .../test/number/NumberFormatterApiTest.java | 2 +- .../ibm/icu/impl/units/ConversionRates.java | 21 ++++- .../ibm/icu/impl/units/UnitPreferences.java | 52 +++++------ .../dev/test/number/NumberSkeletonTest.java | 44 +++++++++ 9 files changed, 196 insertions(+), 67 deletions(-) diff --git a/icu4c/source/i18n/units_data.cpp b/icu4c/source/i18n/units_data.cpp index 801ede88378..b1e069660f2 100644 --- a/icu4c/source/i18n/units_data.cpp +++ b/icu4c/source/i18n/units_data.cpp @@ -7,9 +7,11 @@ #include "bytesinkutil.h" #include "cstring.h" +#include "measunit_impl.h" #include "number_decimalquantity.h" #include "resource.h" #include "uassert.h" +#include "ulocimp.h" #include "unicode/locid.h" #include "unicode/unistr.h" #include "unicode/ures.h" @@ -78,6 +80,7 @@ class ConversionRateDataSink : public ResourceSink { UnicodeString baseUnit = ICU_Utility::makeBogusString(); UnicodeString factor = ICU_Utility::makeBogusString(); UnicodeString offset = ICU_Utility::makeBogusString(); + UnicodeString systems = ICU_Utility::makeBogusString(); for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) { if (uprv_strcmp(key, "target") == 0) { baseUnit = value.getUnicodeString(status); @@ -85,6 +88,8 @@ class ConversionRateDataSink : public ResourceSink { factor = value.getUnicodeString(status); } else if (uprv_strcmp(key, "offset") == 0) { offset = value.getUnicodeString(status); + } else if (uprv_strcmp(key, "systems") == 0) { + systems = value.getUnicodeString(status); } } if (U_FAILURE(status)) { return; } @@ -103,6 +108,7 @@ class ConversionRateDataSink : public ResourceSink { cr->sourceUnit.append(srcUnit, status); cr->baseUnit.appendInvariantChars(baseUnit, status); cr->factor.appendInvariantChars(factor, status); + cr->systems.appendInvariantChars(systems, status); trimSpaces(cr->factor, status); if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status); } @@ -431,46 +437,16 @@ MaybeStackVector } } - CharString region(locale.getCountry(), status); - + char regionBuf[8]; + ulocimp_getRegionForSupplementalData(locale.getName(), false, regionBuf, 8, &status); + CharString region(regionBuf, status); + // Check the locale system tag, e.g `ms=metric`. UErrorCode internalMeasureTagStatus = U_ZERO_ERROR; CharString localeSystem = getKeyWordValue(locale, "measure", internalMeasureTagStatus); bool isLocaleSystem = false; - if (U_SUCCESS(internalMeasureTagStatus)) { - if (localeSystem == "metric") { - region.clear(); - region.append("001", status); - isLocaleSystem = true; - } else if (localeSystem == "ussystem") { - region.clear(); - region.append("US", status); - isLocaleSystem = true; - } else if (localeSystem == "uksystem") { - region.clear(); - region.append("GB", status); - isLocaleSystem = true; - } - } - - // Check the region tag, e.g. `rg=uszzz`. - if (!isLocaleSystem) { - UErrorCode internalRgTagStatus = U_ZERO_ERROR; - CharString localeRegion = getKeyWordValue(locale, "rg", internalRgTagStatus); - if (U_SUCCESS(internalRgTagStatus) && localeRegion.length() >= 3) { - if (localeRegion == "default") { - region.clear(); - region.append(localeRegion, status); - } else if (localeRegion[0] >= '0' && localeRegion[0] <= '9') { - region.clear(); - region.append(localeRegion.data(), 3, status); - } else { - // Take the first two character and capitalize them. - region.clear(); - region.append(uprv_toupper(localeRegion[0]), status); - region.append(uprv_toupper(localeRegion[1]), status); - } - } + if (U_SUCCESS(internalMeasureTagStatus) && (localeSystem == "metric" || localeSystem == "ussystem" || localeSystem == "uksystem")) { + isLocaleSystem = true; } int32_t idx = @@ -481,6 +457,48 @@ MaybeStackVector U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`. const UnitPreferenceMetadata *m = metadata_[idx]; + + if (isLocaleSystem) { + // if the locale ID specifies a measurment system, check if ALL of the units we got back + // are members of that system (or are "metric_adjacent", which we consider to match all + // the systems) + bool unitsMatchSystem = true; + ConversionRates rates(status); + for (int32_t i = 0; unitsMatchSystem && i < m->prefsCount; i++) { + const UnitPreference& unitPref = *(unitPrefs_[i + m->prefsOffset]); + MeasureUnitImpl measureUnit = MeasureUnitImpl::forIdentifier(unitPref.unit.data(), status); + for (int32_t j = 0; unitsMatchSystem && j < measureUnit.singleUnits.length(); j++) { + const SingleUnitImpl* singleUnit = measureUnit.singleUnits[j]; + const ConversionRateInfo* rateInfo = rates.extractConversionInfo(singleUnit->getSimpleUnitID(), status); + CharString systems(rateInfo->systems, status); + if (!systems.contains("metric_adjacent")) { // "metric-adjacent" is considered to match all the locale systems + if (!systems.contains(localeSystem.data())) { + unitsMatchSystem = false; + } + } + } + } + + // if any of the units we got back above don't match the mearurement system the locale ID asked for, + // throw out the region and just load the units for the base region for the requested measurement system + if (!unitsMatchSystem) { + region.clear(); + if (localeSystem == "ussystem") { + region.append("US", status); + } else if (localeSystem == "uksystem") { + region.append("GB", status); + } else { + region.append("001", status); + } + idx = getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status); + if (U_FAILURE(status)) { + return result; + } + + m = metadata_[idx]; + } + } + for (int32_t i = 0; i < m->prefsCount; i++) { result.emplaceBackAndCheckErrorCode(status, *(unitPrefs_[i + m->prefsOffset])); } diff --git a/icu4c/source/i18n/units_data.h b/icu4c/source/i18n/units_data.h index 118458ecca2..ad5033513cb 100644 --- a/icu4c/source/i18n/units_data.h +++ b/icu4c/source/i18n/units_data.h @@ -41,6 +41,7 @@ class U_I18N_API ConversionRateInfo : public UMemory { CharString baseUnit; CharString factor; CharString offset; + CharString systems; }; } // namespace units diff --git a/icu4c/source/test/intltest/numbertest.h b/icu4c/source/test/intltest/numbertest.h index 8db27020a38..1ad8b28da75 100644 --- a/icu4c/source/test/intltest/numbertest.h +++ b/icu4c/source/test/intltest/numbertest.h @@ -302,6 +302,7 @@ class NumberSkeletonTest : public IntlTest { void wildcardCharacters(); void perUnitInArabic(); void perUnitToSkeleton(); + void measurementSystemOverride(); void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0) override; diff --git a/icu4c/source/test/intltest/numbertest_api.cpp b/icu4c/source/test/intltest/numbertest_api.cpp index b9316d61cfe..4146247ff29 100644 --- a/icu4c/source/test/intltest/numbertest_api.cpp +++ b/icu4c/source/test/intltest/numbertest_api.cpp @@ -2996,7 +2996,7 @@ void NumberFormatterApiTest::unitLocaleTags() { nullptr, "celsius", 0.0, "0 degrees Celsius"}, {"Test the locale with mu = fahrenheit and with usage", "en-US-u-mu-fahrenheit", "celsius", 0, "default", "fahrenheit", 32.0, "32 degrees Fahrenheit"}, - {u"Test the locale with rg = UKOI and with usage", "en-US-u-rg-ukoizzzz", "fahrenheit", 0, + {u"Test the locale with rg = UKOI and with usage", "en-US-u-rg-ukoi", "fahrenheit", 0, "default", "celsius", -18.0, u"-18 degrees Celsius"}, // Test the priorities diff --git a/icu4c/source/test/intltest/numbertest_skeletons.cpp b/icu4c/source/test/intltest/numbertest_skeletons.cpp index f09fb60c8c6..1d9259b3d50 100644 --- a/icu4c/source/test/intltest/numbertest_skeletons.cpp +++ b/icu4c/source/test/intltest/numbertest_skeletons.cpp @@ -32,6 +32,7 @@ void NumberSkeletonTest::runIndexedTest(int32_t index, UBool exec, const char*& TESTCASE_AUTO(wildcardCharacters); TESTCASE_AUTO(perUnitInArabic); TESTCASE_AUTO(perUnitToSkeleton); + TESTCASE_AUTO(measurementSystemOverride); TESTCASE_AUTO_END; } @@ -507,4 +508,53 @@ void NumberSkeletonTest::perUnitToSkeleton() { } } +void NumberSkeletonTest::measurementSystemOverride() { + // NOTE TO REVIEWERS: When the appropriate changes are made on the CLDR side, do we want to keep this + // test or rely on additions the CLDR project makes to unitPreferencesTest.txt? --rtg 8/29/23 + IcuTestErrorCode status(*this, "measurementSystemOverride"); + struct TestCase { + const char* locale; + const char16_t* skeleton; + const char16_t* expectedResult; + } testCases[] = { + // Norway uses m/s for wind speed and should with or without the "ms-metric" subtag in the locale, + // but it uses km/h for other speeds. France uses km/h for all speeds. And in both places, if + // you say "ms-ussystem", you should get mph. In the US, we use mph for all speeds, but should + // use km/h if the locale has "ms-metric" in it. + { "nn_NO", u"unit/kilometer-per-hour usage/wind", u"0,34 m/s" }, + { "nn_NO@measure=metric", u"unit/kilometer-per-hour usage/wind", u"0,34 m/s" }, + { "nn_NO@measure=ussystem", u"unit/kilometer-per-hour usage/wind", u"0,76 mile/t" }, + { "fr_FR", u"unit/kilometer-per-hour usage/wind", u"1,2\u202Fkm/h" }, + { "fr_FR@measure=metric", u"unit/kilometer-per-hour usage/wind", u"1,2\u202Fkm/h" }, + { "fr_FR@measure=ussystem", u"unit/kilometer-per-hour usage/wind", u"0,76\u202Fmi/h" }, + { "en_US", u"unit/kilometer-per-hour usage/wind", u"0.76 mph" }, + { "en_US@measure=metric", u"unit/kilometer-per-hour usage/wind", u"1.2 km/h" }, + { "en_US@measure=ussystem", u"unit/kilometer-per-hour usage/wind", u"0.76 mph" }, + + { "nn_NO", u"unit/kilometer-per-hour usage/default", u"1,2 km/t" }, + { "nn_NO@measure=metric", u"unit/kilometer-per-hour usage/default", u"1,2 km/t" }, + { "nn_NO@measure=ussystem", u"unit/kilometer-per-hour usage/default", u"0,76 mile/t" }, + { "fr_FR", u"unit/kilometer-per-hour usage/default", u"1,2\u202Fkm/h" }, + { "fr_FR@measure=metric", u"unit/kilometer-per-hour usage/default", u"1,2\u202Fkm/h" }, + { "fr_FR@measure=ussystem", u"unit/kilometer-per-hour usage/default", u"0,76\u202Fmi/h" }, + { "en_US", u"unit/kilometer-per-hour usage/default", u"0.76 mph" }, + { "en_US@measure=metric", u"unit/kilometer-per-hour usage/default", u"1.2 km/h" }, + { "en_US@measure=ussystem", u"unit/kilometer-per-hour usage/default", u"0.76 mph" }, + }; + + for (const auto& testCase : testCases) { + UErrorCode err = U_ZERO_ERROR; + LocalizedNumberFormatter nf = NumberFormatter::forSkeleton(testCase.skeleton, err).locale(testCase.locale); + UnicodeString actualResult = nf.formatDouble(1.23, err).toString(err); + + UnicodeString errorMessage = ": "; + errorMessage += testCase.locale; + errorMessage += "/"; + errorMessage += testCase.skeleton; + if (assertSuccess(u"Formatting error" + errorMessage, err)) { + assertEquals(u"Wrong result" + errorMessage, testCase.expectedResult, actualResult); + } + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java b/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java index 5005ac603f9..361f554de65 100644 --- a/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java +++ b/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java @@ -513,7 +513,7 @@ public class NumberFormatterApiTest extends TestFmwk { // Test the behaviour of the `rg` tag {"Test the locale with rg = UK and without usage", "en-US-u-rg-ukzzzz", "fahrenheit", "0", null, "fahrenheit", "0.0", "0 degrees Fahrenheit"}, {"Test the locale with rg = UK and with usage", "en-US-u-rg-ukzzzz", "fahrenheit", "0", "default", "celsius", "-18", "-18 degrees Celsius"}, - {"Test the locale with rg = UKOI and with usage", "en-US-u-rg-ukoizzzz", "fahrenheit", "0", "default", "celsius", "-18" , "-18 degrees Celsius"}, + {"Test the locale with rg = UKOI and with usage", "en-US-u-rg-ukoi", "fahrenheit", "0", "default", "celsius", "-18" , "-18 degrees Celsius"}, // Test the priorities {"Test the locale with mu,ms,rg --> mu tag wins", "en-US-u-mu-celsius-ms-ussystem-rg-uszzzz", "celsius", "0", "default", "celsius", "0.0", "0 degrees Celsius"}, diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/impl/units/ConversionRates.java b/icu4j/main/core/src/main/java/com/ibm/icu/impl/units/ConversionRates.java index fe348656229..bbc5be26703 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/impl/units/ConversionRates.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/impl/units/ConversionRates.java @@ -119,6 +119,13 @@ public class ConversionRates { return targetImpl.getSingleUnits(); } + /** + * @return The measurement systems for the specified unit. + */ + public String extractSystems(SingleUnitImpl singleUnit) { + return mapToConversionRate.get(singleUnit.getSimpleUnitID()).getSystems(); + } + /** * Checks if the {@code MeasureUnitImpl} is simple or not. * @@ -155,6 +162,7 @@ public class ConversionRates { String target = null; String factor = null; String offset = "0"; + String systems = null; for (int j = 0; simpleUnitConversionInfo.getKeyAndValue(j, key, value); j++) { assert (value.getType() == UResourceBundle.STRING); @@ -168,7 +176,7 @@ public class ConversionRates { } else if ("offset".equals(keyString)) { offset = valueString; } else if ("systems".equals(keyString)) { - // just ignore for time being + systems = value.toString(); // still want the spaces here } else { assert false : "The key must be target, factor, systems or offset"; } @@ -178,7 +186,7 @@ public class ConversionRates { assert (target != null); assert (factor != null); - mapToConversionRate.put(simpleUnit, new ConversionRateInfo(simpleUnit, target, factor, offset)); + mapToConversionRate.put(simpleUnit, new ConversionRateInfo(simpleUnit, target, factor, offset, systems)); } @@ -196,12 +204,14 @@ public class ConversionRates { private final String target; private final String conversionRate; private final BigDecimal offset; + private final String systems; - public ConversionRateInfo(String simpleUnit, String target, String conversionRate, String offset) { + public ConversionRateInfo(String simpleUnit, String target, String conversionRate, String offset, String systems) { this.simpleUnit = simpleUnit; this.target = target; this.conversionRate = conversionRate; this.offset = forNumberWithDivision(offset); + this.systems = systems; } private static BigDecimal forNumberWithDivision(String numberWithDivision) { @@ -238,5 +248,10 @@ public class ConversionRates { public String getConversionRate() { return conversionRate; } + + /** + * @return The measurement systems this unit belongs to. + */ + public String getSystems() { return systems; } } } diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/impl/units/UnitPreferences.java b/icu4j/main/core/src/main/java/com/ibm/icu/impl/units/UnitPreferences.java index 39d6b0ac151..43fca24c10a 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/impl/units/UnitPreferences.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/impl/units/UnitPreferences.java @@ -3,15 +3,12 @@ package com.ibm.icu.impl.units; import java.math.BigDecimal; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Locale; -import java.util.Map; +import java.util.*; import com.ibm.icu.impl.ICUData; import com.ibm.icu.impl.ICUResourceBundle; import com.ibm.icu.impl.UResource; +import com.ibm.icu.util.MeasureUnit; import com.ibm.icu.util.ULocale; import com.ibm.icu.util.UResourceBundle; @@ -92,36 +89,39 @@ public class UnitPreferences { } } - String region = locale.getCountry(); + String region = ULocale.getRegionForSupplementalData(locale, false); // Check the locale system tag, e.g `ms=metric`. String localeSystem = locale.getKeywordValue("measure"); - boolean isLocaleSystem = false; - if (measurementSystem.containsKey(localeSystem)) { - isLocaleSystem = true; - region = measurementSystem.get(localeSystem); - } - - // Check the region tag, e.g. `rg=uszzz`. - if (!isLocaleSystem) { - String localeRegion = locale.getKeywordValue("rg"); - if (localeRegion != null && localeRegion.length() >= 3) { - if (localeRegion.equals("default")) { - region = localeRegion; - } else if (Character.isDigit(localeRegion.charAt(0))) { - region = localeRegion.substring(0, 3); // e.g. 001 - } else { - // Capitalize the first two character of the region, e.g. ukzzzz or usca - region = localeRegion.substring(0, 2).toUpperCase(Locale.ROOT); - } - } - } + boolean isLocaleSystem = measurementSystem.containsKey(localeSystem); String[] subUsages = getAllUsages(usage); UnitPreference[] result = null; for (String subUsage : subUsages) { result = getUnitPreferences(category, subUsage, region); + + if (result != null && isLocaleSystem) { + ConversionRates rates = new ConversionRates(); + boolean unitsMatchSystem = true; + for (UnitPreference unitPref : result) { + MeasureUnitImpl measureUnit = MeasureUnitImpl.forIdentifier(unitPref.getUnit()); + List singleUnits = new ArrayList<>(measureUnit.getSingleUnits()); + for (SingleUnitImpl singleUnit : singleUnits) { + String systems = rates.extractSystems(singleUnit); + if (!systems.contains("metric_adjacent")) { + if (!systems.contains(localeSystem)) { + unitsMatchSystem = false; + } + } + } + } + if (!unitsMatchSystem) { + String newRegion = measurementSystem.get(localeSystem); + result = getUnitPreferences(category, subUsage, newRegion); + } + } + if (result != null) break; } diff --git a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/number/NumberSkeletonTest.java b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/number/NumberSkeletonTest.java index 6b99583a1c8..515e75fd5ab 100644 --- a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/number/NumberSkeletonTest.java +++ b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/number/NumberSkeletonTest.java @@ -7,7 +7,9 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.math.RoundingMode; +import java.util.Locale; +import com.ibm.icu.number.LocalizedNumberFormatter; import org.junit.Test; import com.ibm.icu.number.NumberFormatter; @@ -459,4 +461,46 @@ public class NumberSkeletonTest { } } } + + @Test + public void measurementSystemOverride() { + // NOTE TO REVIEWERS: When the appropriate changes are made on the CLDR side, do we want to keep this + // test or rely on additions the CLDR project makes to unitPreferencesTest.txt? --rtg 8/29/23 + String[][] testCases = { + // Norway uses m/s for wind speed and should with or without the "ms-metric" subtag in the locale, + // but it uses km/h for other speeds. France uses km/h for all speeds. And in both places, if + // you say "ms-ussystem", you should get mph. In the US, we use mph for all speeds, but should + // use km/h if the locale has "ms-metric" in it. + { "nn-NO", "unit/kilometer-per-hour usage/wind", "0,34 m/s" }, + { "nn-NO-u-ms-metric", "unit/kilometer-per-hour usage/wind", "0,34 m/s" }, + { "nn-NO-u-ms-ussystem", "unit/kilometer-per-hour usage/wind", "0,76 mile/t" }, + { "fr-FR", "unit/kilometer-per-hour usage/wind", "1,2\u202Fkm/h" }, + { "fr-FR-u-ms-metric", "unit/kilometer-per-hour usage/wind", "1,2\u202Fkm/h" }, + { "fr-FR-u-ms-ussystem", "unit/kilometer-per-hour usage/wind", "0,76\u202Fmi/h" }, + { "en-US", "unit/kilometer-per-hour usage/wind", "0.76 mph" }, + { "en-US-u-ms-metric", "unit/kilometer-per-hour usage/wind", "1.2 km/h" }, + { "en-US-u-ms-ussystem", "unit/kilometer-per-hour usage/wind", "0.76 mph" }, + + { "nn-NO", "unit/kilometer-per-hour usage/default", "1,2 km/t" }, + { "nn-NO-u-ms-metric", "unit/kilometer-per-hour usage/default", "1,2 km/t" }, + { "nn-NO-u-ms-ussystem", "unit/kilometer-per-hour usage/default", "0,76 mile/t" }, + { "fr-FR", "unit/kilometer-per-hour usage/default", "1,2\u202Fkm/h" }, + { "fr-FR-u-ms-metric", "unit/kilometer-per-hour usage/default", "1,2\u202Fkm/h" }, + { "fr-FR-u-ms-ussystem", "unit/kilometer-per-hour usage/default", "0,76\u202Fmi/h" }, + { "en-US", "unit/kilometer-per-hour usage/default", "0.76 mph" }, + { "en-US-u-ms-metric", "unit/kilometer-per-hour usage/default", "1.2 km/h" }, + { "en-US-u-ms-ussystem", "unit/kilometer-per-hour usage/default", "0.76 mph" }, + }; + + for (String[] testCase : testCases) { + String languageTag = testCase[0]; + String skeleton = testCase[1]; + String expectedResult = testCase[2]; + + LocalizedNumberFormatter nf = NumberFormatter.forSkeleton(skeleton).locale(Locale.forLanguageTag(languageTag)); + String actualResult = nf.format(1.23).toString(); + + assertEquals("Wrong result: " + languageTag + ":" + skeleton, expectedResult, actualResult); + } + } }