ICU-22455 Implemented algorithm in CLDR-16981 to preserve regional unit overrides when they don't conflict

with the ms subtag.
This commit is contained in:
Rich Gillam 2023-08-30 19:38:29 -04:00 committed by Rich Gillam
parent aa70ba6746
commit 21f74b3698
9 changed files with 196 additions and 67 deletions

View file

@ -7,9 +7,11 @@
#include "bytesinkutil.h"
#include "cstring.h"
#include "measunit_impl.h"
#include "number_decimalquantity.h"
#include "resource.h"
#include "uassert.h"
#include "ulocimp.h"
#include "unicode/locid.h"
#include "unicode/unistr.h"
#include "unicode/ures.h"
@ -78,6 +80,7 @@ class ConversionRateDataSink : public ResourceSink {
UnicodeString baseUnit = ICU_Utility::makeBogusString();
UnicodeString factor = ICU_Utility::makeBogusString();
UnicodeString offset = ICU_Utility::makeBogusString();
UnicodeString systems = ICU_Utility::makeBogusString();
for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) {
if (uprv_strcmp(key, "target") == 0) {
baseUnit = value.getUnicodeString(status);
@ -85,6 +88,8 @@ class ConversionRateDataSink : public ResourceSink {
factor = value.getUnicodeString(status);
} else if (uprv_strcmp(key, "offset") == 0) {
offset = value.getUnicodeString(status);
} else if (uprv_strcmp(key, "systems") == 0) {
systems = value.getUnicodeString(status);
}
}
if (U_FAILURE(status)) { return; }
@ -103,6 +108,7 @@ class ConversionRateDataSink : public ResourceSink {
cr->sourceUnit.append(srcUnit, status);
cr->baseUnit.appendInvariantChars(baseUnit, status);
cr->factor.appendInvariantChars(factor, status);
cr->systems.appendInvariantChars(systems, status);
trimSpaces(cr->factor, status);
if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status);
}
@ -431,46 +437,16 @@ MaybeStackVector<UnitPreference>
}
}
CharString region(locale.getCountry(), status);
char regionBuf[8];
ulocimp_getRegionForSupplementalData(locale.getName(), false, regionBuf, 8, &status);
CharString region(regionBuf, status);
// Check the locale system tag, e.g `ms=metric`.
UErrorCode internalMeasureTagStatus = U_ZERO_ERROR;
CharString localeSystem = getKeyWordValue(locale, "measure", internalMeasureTagStatus);
bool isLocaleSystem = false;
if (U_SUCCESS(internalMeasureTagStatus)) {
if (localeSystem == "metric") {
region.clear();
region.append("001", status);
isLocaleSystem = true;
} else if (localeSystem == "ussystem") {
region.clear();
region.append("US", status);
isLocaleSystem = true;
} else if (localeSystem == "uksystem") {
region.clear();
region.append("GB", status);
isLocaleSystem = true;
}
}
// Check the region tag, e.g. `rg=uszzz`.
if (!isLocaleSystem) {
UErrorCode internalRgTagStatus = U_ZERO_ERROR;
CharString localeRegion = getKeyWordValue(locale, "rg", internalRgTagStatus);
if (U_SUCCESS(internalRgTagStatus) && localeRegion.length() >= 3) {
if (localeRegion == "default") {
region.clear();
region.append(localeRegion, status);
} else if (localeRegion[0] >= '0' && localeRegion[0] <= '9') {
region.clear();
region.append(localeRegion.data(), 3, status);
} else {
// Take the first two character and capitalize them.
region.clear();
region.append(uprv_toupper(localeRegion[0]), status);
region.append(uprv_toupper(localeRegion[1]), status);
}
}
if (U_SUCCESS(internalMeasureTagStatus) && (localeSystem == "metric" || localeSystem == "ussystem" || localeSystem == "uksystem")) {
isLocaleSystem = true;
}
int32_t idx =
@ -481,6 +457,48 @@ MaybeStackVector<UnitPreference>
U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`.
const UnitPreferenceMetadata *m = metadata_[idx];
if (isLocaleSystem) {
// if the locale ID specifies a measurment system, check if ALL of the units we got back
// are members of that system (or are "metric_adjacent", which we consider to match all
// the systems)
bool unitsMatchSystem = true;
ConversionRates rates(status);
for (int32_t i = 0; unitsMatchSystem && i < m->prefsCount; i++) {
const UnitPreference& unitPref = *(unitPrefs_[i + m->prefsOffset]);
MeasureUnitImpl measureUnit = MeasureUnitImpl::forIdentifier(unitPref.unit.data(), status);
for (int32_t j = 0; unitsMatchSystem && j < measureUnit.singleUnits.length(); j++) {
const SingleUnitImpl* singleUnit = measureUnit.singleUnits[j];
const ConversionRateInfo* rateInfo = rates.extractConversionInfo(singleUnit->getSimpleUnitID(), status);
CharString systems(rateInfo->systems, status);
if (!systems.contains("metric_adjacent")) { // "metric-adjacent" is considered to match all the locale systems
if (!systems.contains(localeSystem.data())) {
unitsMatchSystem = false;
}
}
}
}
// if any of the units we got back above don't match the mearurement system the locale ID asked for,
// throw out the region and just load the units for the base region for the requested measurement system
if (!unitsMatchSystem) {
region.clear();
if (localeSystem == "ussystem") {
region.append("US", status);
} else if (localeSystem == "uksystem") {
region.append("GB", status);
} else {
region.append("001", status);
}
idx = getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status);
if (U_FAILURE(status)) {
return result;
}
m = metadata_[idx];
}
}
for (int32_t i = 0; i < m->prefsCount; i++) {
result.emplaceBackAndCheckErrorCode(status, *(unitPrefs_[i + m->prefsOffset]));
}

View file

@ -41,6 +41,7 @@ class U_I18N_API ConversionRateInfo : public UMemory {
CharString baseUnit;
CharString factor;
CharString offset;
CharString systems;
};
} // namespace units

View file

@ -302,6 +302,7 @@ class NumberSkeletonTest : public IntlTest {
void wildcardCharacters();
void perUnitInArabic();
void perUnitToSkeleton();
void measurementSystemOverride();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0) override;

View file

@ -2996,7 +2996,7 @@ void NumberFormatterApiTest::unitLocaleTags() {
nullptr, "celsius", 0.0, "0 degrees Celsius"},
{"Test the locale with mu = fahrenheit and with usage", "en-US-u-mu-fahrenheit", "celsius", 0,
"default", "fahrenheit", 32.0, "32 degrees Fahrenheit"},
{u"Test the locale with rg = UKOI and with usage", "en-US-u-rg-ukoizzzz", "fahrenheit", 0,
{u"Test the locale with rg = UKOI and with usage", "en-US-u-rg-ukoi", "fahrenheit", 0,
"default", "celsius", -18.0, u"-18 degrees Celsius"},
// Test the priorities

View file

@ -32,6 +32,7 @@ void NumberSkeletonTest::runIndexedTest(int32_t index, UBool exec, const char*&
TESTCASE_AUTO(wildcardCharacters);
TESTCASE_AUTO(perUnitInArabic);
TESTCASE_AUTO(perUnitToSkeleton);
TESTCASE_AUTO(measurementSystemOverride);
TESTCASE_AUTO_END;
}
@ -507,4 +508,53 @@ void NumberSkeletonTest::perUnitToSkeleton() {
}
}
void NumberSkeletonTest::measurementSystemOverride() {
// NOTE TO REVIEWERS: When the appropriate changes are made on the CLDR side, do we want to keep this
// test or rely on additions the CLDR project makes to unitPreferencesTest.txt? --rtg 8/29/23
IcuTestErrorCode status(*this, "measurementSystemOverride");
struct TestCase {
const char* locale;
const char16_t* skeleton;
const char16_t* expectedResult;
} testCases[] = {
// Norway uses m/s for wind speed and should with or without the "ms-metric" subtag in the locale,
// but it uses km/h for other speeds. France uses km/h for all speeds. And in both places, if
// you say "ms-ussystem", you should get mph. In the US, we use mph for all speeds, but should
// use km/h if the locale has "ms-metric" in it.
{ "nn_NO", u"unit/kilometer-per-hour usage/wind", u"0,34 m/s" },
{ "nn_NO@measure=metric", u"unit/kilometer-per-hour usage/wind", u"0,34 m/s" },
{ "nn_NO@measure=ussystem", u"unit/kilometer-per-hour usage/wind", u"0,76 mile/t" },
{ "fr_FR", u"unit/kilometer-per-hour usage/wind", u"1,2\u202Fkm/h" },
{ "fr_FR@measure=metric", u"unit/kilometer-per-hour usage/wind", u"1,2\u202Fkm/h" },
{ "fr_FR@measure=ussystem", u"unit/kilometer-per-hour usage/wind", u"0,76\u202Fmi/h" },
{ "en_US", u"unit/kilometer-per-hour usage/wind", u"0.76 mph" },
{ "en_US@measure=metric", u"unit/kilometer-per-hour usage/wind", u"1.2 km/h" },
{ "en_US@measure=ussystem", u"unit/kilometer-per-hour usage/wind", u"0.76 mph" },
{ "nn_NO", u"unit/kilometer-per-hour usage/default", u"1,2 km/t" },
{ "nn_NO@measure=metric", u"unit/kilometer-per-hour usage/default", u"1,2 km/t" },
{ "nn_NO@measure=ussystem", u"unit/kilometer-per-hour usage/default", u"0,76 mile/t" },
{ "fr_FR", u"unit/kilometer-per-hour usage/default", u"1,2\u202Fkm/h" },
{ "fr_FR@measure=metric", u"unit/kilometer-per-hour usage/default", u"1,2\u202Fkm/h" },
{ "fr_FR@measure=ussystem", u"unit/kilometer-per-hour usage/default", u"0,76\u202Fmi/h" },
{ "en_US", u"unit/kilometer-per-hour usage/default", u"0.76 mph" },
{ "en_US@measure=metric", u"unit/kilometer-per-hour usage/default", u"1.2 km/h" },
{ "en_US@measure=ussystem", u"unit/kilometer-per-hour usage/default", u"0.76 mph" },
};
for (const auto& testCase : testCases) {
UErrorCode err = U_ZERO_ERROR;
LocalizedNumberFormatter nf = NumberFormatter::forSkeleton(testCase.skeleton, err).locale(testCase.locale);
UnicodeString actualResult = nf.formatDouble(1.23, err).toString(err);
UnicodeString errorMessage = ": ";
errorMessage += testCase.locale;
errorMessage += "/";
errorMessage += testCase.skeleton;
if (assertSuccess(u"Formatting error" + errorMessage, err)) {
assertEquals(u"Wrong result" + errorMessage, testCase.expectedResult, actualResult);
}
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -513,7 +513,7 @@ public class NumberFormatterApiTest extends TestFmwk {
// Test the behaviour of the `rg` tag
{"Test the locale with rg = UK and without usage", "en-US-u-rg-ukzzzz", "fahrenheit", "0", null, "fahrenheit", "0.0", "0 degrees Fahrenheit"},
{"Test the locale with rg = UK and with usage", "en-US-u-rg-ukzzzz", "fahrenheit", "0", "default", "celsius", "-18", "-18 degrees Celsius"},
{"Test the locale with rg = UKOI and with usage", "en-US-u-rg-ukoizzzz", "fahrenheit", "0", "default", "celsius", "-18" , "-18 degrees Celsius"},
{"Test the locale with rg = UKOI and with usage", "en-US-u-rg-ukoi", "fahrenheit", "0", "default", "celsius", "-18" , "-18 degrees Celsius"},
// Test the priorities
{"Test the locale with mu,ms,rg --> mu tag wins", "en-US-u-mu-celsius-ms-ussystem-rg-uszzzz", "celsius", "0", "default", "celsius", "0.0", "0 degrees Celsius"},

View file

@ -119,6 +119,13 @@ public class ConversionRates {
return targetImpl.getSingleUnits();
}
/**
* @return The measurement systems for the specified unit.
*/
public String extractSystems(SingleUnitImpl singleUnit) {
return mapToConversionRate.get(singleUnit.getSimpleUnitID()).getSystems();
}
/**
* Checks if the {@code MeasureUnitImpl} is simple or not.
*
@ -155,6 +162,7 @@ public class ConversionRates {
String target = null;
String factor = null;
String offset = "0";
String systems = null;
for (int j = 0; simpleUnitConversionInfo.getKeyAndValue(j, key, value); j++) {
assert (value.getType() == UResourceBundle.STRING);
@ -168,7 +176,7 @@ public class ConversionRates {
} else if ("offset".equals(keyString)) {
offset = valueString;
} else if ("systems".equals(keyString)) {
// just ignore for time being
systems = value.toString(); // still want the spaces here
} else {
assert false : "The key must be target, factor, systems or offset";
}
@ -178,7 +186,7 @@ public class ConversionRates {
assert (target != null);
assert (factor != null);
mapToConversionRate.put(simpleUnit, new ConversionRateInfo(simpleUnit, target, factor, offset));
mapToConversionRate.put(simpleUnit, new ConversionRateInfo(simpleUnit, target, factor, offset, systems));
}
@ -196,12 +204,14 @@ public class ConversionRates {
private final String target;
private final String conversionRate;
private final BigDecimal offset;
private final String systems;
public ConversionRateInfo(String simpleUnit, String target, String conversionRate, String offset) {
public ConversionRateInfo(String simpleUnit, String target, String conversionRate, String offset, String systems) {
this.simpleUnit = simpleUnit;
this.target = target;
this.conversionRate = conversionRate;
this.offset = forNumberWithDivision(offset);
this.systems = systems;
}
private static BigDecimal forNumberWithDivision(String numberWithDivision) {
@ -238,5 +248,10 @@ public class ConversionRates {
public String getConversionRate() {
return conversionRate;
}
/**
* @return The measurement systems this unit belongs to.
*/
public String getSystems() { return systems; }
}
}

View file

@ -3,15 +3,12 @@
package com.ibm.icu.impl.units;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.*;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.UResource;
import com.ibm.icu.util.MeasureUnit;
import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.UResourceBundle;
@ -92,36 +89,39 @@ public class UnitPreferences {
}
}
String region = locale.getCountry();
String region = ULocale.getRegionForSupplementalData(locale, false);
// Check the locale system tag, e.g `ms=metric`.
String localeSystem = locale.getKeywordValue("measure");
boolean isLocaleSystem = false;
if (measurementSystem.containsKey(localeSystem)) {
isLocaleSystem = true;
region = measurementSystem.get(localeSystem);
}
// Check the region tag, e.g. `rg=uszzz`.
if (!isLocaleSystem) {
String localeRegion = locale.getKeywordValue("rg");
if (localeRegion != null && localeRegion.length() >= 3) {
if (localeRegion.equals("default")) {
region = localeRegion;
} else if (Character.isDigit(localeRegion.charAt(0))) {
region = localeRegion.substring(0, 3); // e.g. 001
} else {
// Capitalize the first two character of the region, e.g. ukzzzz or usca
region = localeRegion.substring(0, 2).toUpperCase(Locale.ROOT);
}
}
}
boolean isLocaleSystem = measurementSystem.containsKey(localeSystem);
String[] subUsages = getAllUsages(usage);
UnitPreference[] result = null;
for (String subUsage :
subUsages) {
result = getUnitPreferences(category, subUsage, region);
if (result != null && isLocaleSystem) {
ConversionRates rates = new ConversionRates();
boolean unitsMatchSystem = true;
for (UnitPreference unitPref : result) {
MeasureUnitImpl measureUnit = MeasureUnitImpl.forIdentifier(unitPref.getUnit());
List<SingleUnitImpl> singleUnits = new ArrayList<>(measureUnit.getSingleUnits());
for (SingleUnitImpl singleUnit : singleUnits) {
String systems = rates.extractSystems(singleUnit);
if (!systems.contains("metric_adjacent")) {
if (!systems.contains(localeSystem)) {
unitsMatchSystem = false;
}
}
}
}
if (!unitsMatchSystem) {
String newRegion = measurementSystem.get(localeSystem);
result = getUnitPreferences(category, subUsage, newRegion);
}
}
if (result != null) break;
}

View file

@ -7,7 +7,9 @@ import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.math.RoundingMode;
import java.util.Locale;
import com.ibm.icu.number.LocalizedNumberFormatter;
import org.junit.Test;
import com.ibm.icu.number.NumberFormatter;
@ -459,4 +461,46 @@ public class NumberSkeletonTest {
}
}
}
@Test
public void measurementSystemOverride() {
// NOTE TO REVIEWERS: When the appropriate changes are made on the CLDR side, do we want to keep this
// test or rely on additions the CLDR project makes to unitPreferencesTest.txt? --rtg 8/29/23
String[][] testCases = {
// Norway uses m/s for wind speed and should with or without the "ms-metric" subtag in the locale,
// but it uses km/h for other speeds. France uses km/h for all speeds. And in both places, if
// you say "ms-ussystem", you should get mph. In the US, we use mph for all speeds, but should
// use km/h if the locale has "ms-metric" in it.
{ "nn-NO", "unit/kilometer-per-hour usage/wind", "0,34 m/s" },
{ "nn-NO-u-ms-metric", "unit/kilometer-per-hour usage/wind", "0,34 m/s" },
{ "nn-NO-u-ms-ussystem", "unit/kilometer-per-hour usage/wind", "0,76 mile/t" },
{ "fr-FR", "unit/kilometer-per-hour usage/wind", "1,2\u202Fkm/h" },
{ "fr-FR-u-ms-metric", "unit/kilometer-per-hour usage/wind", "1,2\u202Fkm/h" },
{ "fr-FR-u-ms-ussystem", "unit/kilometer-per-hour usage/wind", "0,76\u202Fmi/h" },
{ "en-US", "unit/kilometer-per-hour usage/wind", "0.76 mph" },
{ "en-US-u-ms-metric", "unit/kilometer-per-hour usage/wind", "1.2 km/h" },
{ "en-US-u-ms-ussystem", "unit/kilometer-per-hour usage/wind", "0.76 mph" },
{ "nn-NO", "unit/kilometer-per-hour usage/default", "1,2 km/t" },
{ "nn-NO-u-ms-metric", "unit/kilometer-per-hour usage/default", "1,2 km/t" },
{ "nn-NO-u-ms-ussystem", "unit/kilometer-per-hour usage/default", "0,76 mile/t" },
{ "fr-FR", "unit/kilometer-per-hour usage/default", "1,2\u202Fkm/h" },
{ "fr-FR-u-ms-metric", "unit/kilometer-per-hour usage/default", "1,2\u202Fkm/h" },
{ "fr-FR-u-ms-ussystem", "unit/kilometer-per-hour usage/default", "0,76\u202Fmi/h" },
{ "en-US", "unit/kilometer-per-hour usage/default", "0.76 mph" },
{ "en-US-u-ms-metric", "unit/kilometer-per-hour usage/default", "1.2 km/h" },
{ "en-US-u-ms-ussystem", "unit/kilometer-per-hour usage/default", "0.76 mph" },
};
for (String[] testCase : testCases) {
String languageTag = testCase[0];
String skeleton = testCase[1];
String expectedResult = testCase[2];
LocalizedNumberFormatter nf = NumberFormatter.forSkeleton(skeleton).locale(Locale.forLanguageTag(languageTag));
String actualResult = nf.format(1.23).toString();
assertEquals("Wrong result: " + languageTag + ":" + skeleton, expectedResult, actualResult);
}
}
}