ICU-22362 Fix the name order derivation code in PersonNameFormatter to match the CLDR spec.

This commit is contained in:
Rich Gillam 2023-07-31 18:42:21 -07:00 committed by Rich Gillam
parent 6ba5a1a1b7
commit 89b952dffd
4 changed files with 98 additions and 21 deletions

View file

@ -1285,7 +1285,7 @@ public class ICUResourceBundle extends UResourceBundle {
return result;
}
private static String getParentLocaleID(String name, String origName, OpenType openType) {
public static String getParentLocaleID(String name, String origName, OpenType openType) {
// early out if the locale ID has a variant code or ends with _
if (name.endsWith("_") || !ULocale.getVariant(name).isEmpty()) {
int lastUnderbarPos = name.lastIndexOf('_');

View file

@ -98,11 +98,8 @@ public class PersonNameFormatterImpl {
/**
* THIS IS A DUMMY CONSTRUCTOR JUST FOR THE USE OF THE UNIT TESTS TO CHECK SOME OF THE INTERNAL IMPLEMENTATION!
*/
public PersonNameFormatterImpl(Locale locale, String[] patterns) {
public PersonNameFormatterImpl(Locale locale, String[] gnFirstPatterns, String[] snFirstPatterns, String[] gnFirstLocales, String[] snFirstLocales) {
// first, set dummy values for the other fields
snFirstPatterns = null;
gnFirstLocales = null;
snFirstLocales = null;
length = PersonNameFormatter.Length.MEDIUM;
usage = PersonNameFormatter.Usage.REFERRING;
formality = PersonNameFormatter.Formality.FORMAL;
@ -114,10 +111,22 @@ public class PersonNameFormatterImpl {
nativeSpaceReplacement = " ";
formatterLocaleUsesSpaces = true;
// then, set values for the fields we actually care about
// then, set values for the fields we actually care about (all but gnFirstPatterns are optional)
this.locale = locale;
gnFirstPatterns = PersonNamePattern.makePatterns(patterns, this);
this.gnFirstPatterns = PersonNamePattern.makePatterns(gnFirstPatterns, this);
this.snFirstPatterns = (snFirstPatterns != null) ? PersonNamePattern.makePatterns(snFirstPatterns, this) : null;
if (gnFirstLocales != null) {
this.gnFirstLocales = new HashSet<>();
Collections.addAll(this.gnFirstLocales, gnFirstLocales);
} else {
this.gnFirstLocales = null;
}
if (snFirstLocales != null) {
this.snFirstLocales = new HashSet<>();
Collections.addAll(this.snFirstLocales, snFirstLocales);
} else {
this.snFirstLocales = null;
}
}
@Override
@ -193,6 +202,8 @@ public class PersonNameFormatterImpl {
private final Set<String> LOCALES_THAT_DONT_USE_SPACES = new HashSet<>(Arrays.asList("ja", "zh", "yue", "km", "lo", "my"));
static final Set NON_DEFAULT_SCRIPTS = new HashSet<>(Arrays.asList("Hani", "Hira", "Kana"));
/**
* Returns the value of the resource, as a string array.
* @param resource An ICUResourceBundle of type STRING or ARRAY. If ARRAY, this function just returns it
@ -223,23 +234,57 @@ public class PersonNameFormatterImpl {
return false;
}
String localeStr = getNameLocale(name).toString();
// Otherwise, search the gnFirstLocales and snFirstLocales for the locale's name.
// For our purposes, the "locale's name" is the locale the name itself gives us (if it
// has one), or the locale we guess for the name (if it doesn't).
Locale nameLocale = name.getNameLocale();
if (nameLocale == null) {
nameLocale = getNameLocale(name);
}
// this is a hack to deal with certain script codes that are valid, but not the default, for their locales--
// to make the parent-chain lookup work right, we need to replace any of those script codes (in the name's locale)
// with the appropriate default script for whatever language and region we have
ULocale nameULocale = ULocale.forLocale(nameLocale);
if (NON_DEFAULT_SCRIPTS.contains(nameULocale.getScript())) {
ULocale.Builder builder = new ULocale.Builder();
builder.setLocale(nameULocale);
builder.setScript(null);
nameULocale = ULocale.addLikelySubtags(builder.build());
}
// now search for the locale in the gnFirstLocales and snFirstLocales lists...
String localeStr = nameULocale.getName();
String origLocaleStr = localeStr;
String languageCode = nameULocale.getLanguage();
do {
// first check if the locale is in one of those lists
if (gnFirstLocales.contains(localeStr)) {
return true;
} else if (snFirstLocales.contains(localeStr)) {
return false;
}
int lastUnderbarPos = localeStr.lastIndexOf("_");
if (lastUnderbarPos >= 0) {
localeStr = localeStr.substring(0, lastUnderbarPos);
} else {
localeStr = "root";
// if not, try again with "und" in place of the language code (this lets us use "und_CN" to match
// all locales with a region code of "CN" and makes sure the last thing we try is always "und", which
// is required to be in gnFirstLocales or snFirstLocales)
String undStr = localeStr.replaceAll("^" + languageCode, "und");
if (gnFirstLocales.contains(undStr)) {
return true;
} else if (snFirstLocales.contains(undStr)) {
return false;
}
} while (!localeStr.equals("root"));
// should never get here-- "root" should always be in one of the locales
// if we haven't found the locale ID yet, look up its parent locale ID and try again-- if getParentLocaleID()
// returns null (i.e., we have a locale ID, such as "zh_Hant", that inherits directly from "root"), try again
// with just the locale ID's language code (this fixes it so that "zh_Hant" matches "zh", even though "zh" isn't,
// strictly speaking, its parent locale)
String parentLocaleStr = ICUResourceBundle.getParentLocaleID(localeStr, origLocaleStr, ICUResourceBundle.OpenType.LOCALE_DEFAULT_ROOT);
localeStr = (parentLocaleStr != null) ? parentLocaleStr : languageCode;
} while (localeStr != null);
// should never get here ("und" should always be in gnFirstLocales or snFirstLocales), but if we do...
return true;
}

View file

@ -319,8 +319,8 @@ public class PersonNameFormatter {
* @deprecated This API is for unit testing only.
*/
@Deprecated
public PersonNameFormatter(Locale locale, String[] patterns) {
this.impl = new PersonNameFormatterImpl(locale, patterns);
public PersonNameFormatter(Locale locale, String[] gnFirstPatterns, String[] snFirstPatterns, String[] gnFirstLocales, String[] snFirstLocales) {
this.impl = new PersonNameFormatterImpl(locale, gnFirstPatterns, snFirstPatterns, gnFirstLocales, snFirstLocales);
}
/**

View file

@ -440,7 +440,7 @@ public class PersonNameFormatterTest extends TestFmwk{
// a more extensive text of the literal text elision logic
PersonNameFormatter pnf = new PersonNameFormatter(Locale.US, new String[] {
"1{title}1 2{given}2 3{given2}3 4{surname}4 5{surname2}5 6{generation}6"
});
}, null, null, null);
String[][] testCases = new String[][] {
{ "locale=en_US,title=Dr.,given=Richard,given2=Theodore,surname=Gillam,surname2=Morgan,generation=III", "1Dr.1 2Richard2 3Theodore3 4Gillam4 5Morgan5 6III6" },
@ -467,7 +467,7 @@ public class PersonNameFormatterTest extends TestFmwk{
"A {title} {given} {given2} {surname} {surname2} {generation}",
"B {given} {given2} {surname} {surname2}",
"C {given} {surname}",
});
}, null, null, null);
String[][] testCases = new String[][] {
// { "locale=en_US,title=Dr.,given=Richard,given2=Theodore,surname=Gillam,surname2=Morgan,generation=III", "A Dr. Richard Theodore Gillam Morgan III" },
@ -502,7 +502,7 @@ public class PersonNameFormatterTest extends TestFmwk{
};
for (String[] testCase : testCases) {
PersonNameFormatter pnf = new PersonNameFormatter(new Locale("hu", "HU"), new String[] { testCase[0] } );
PersonNameFormatter pnf = new PersonNameFormatter(new Locale("hu", "HU"), new String[] { testCase[0] }, null, null, null );
String expectedResult = testCase[1];
String actualResult = pnf.formatToString(name);
@ -566,4 +566,36 @@ public class PersonNameFormatterTest extends TestFmwk{
}),
}, false);
}
@Test
public void TestNameOrderFromLocale() {
PersonNameFormatter pnf = new PersonNameFormatter(Locale.US,
new String[] { "{given} {surname}" }, // gnFirstPatterns
new String[] { "{surname} {given}" }, // snFirstPatterns
new String[] { "und", "zh_Hant" }, // gnFirstLocales
new String[] { "zh", "und_CN", "und_SG" } // snFirstLocales
);
String[][] testCases = new String[][] {
{ "en", "Given Sur" }, // should match "und"
{ "zh", "Sur Given" }, // should match "zh"
{ "en_US", "Given Sur" }, // should match "und"
{ "zh_CN", "Sur Given" }, // should match "und_CN"
{ "zh_TW", "Given Sur" }, // should match "zh_Hant"
{ "zh_Hans", "Sur Given" }, // should match "zh"
{ "zh_Hant", "Given Sur" }, // should match "zh_Hant"
{ "zh_Hant_CN", "Given Sur" }, // should match "zh_Hant", NOT "und_CN"
{ "en_CN", "Sur Given" }, // should match "und_CN"
{ "de_DE", "Given Sur" }, // should match "und"
};
for (String[] testCase : testCases) {
String localeID = testCase[0];
String expectedResult = testCase[1];
SimplePersonName name = buildPersonName("given=Given,surname=Sur,locale=" + localeID);
String actualResult = pnf.formatToString(name);
assertEquals("Wrong result for " + localeID, expectedResult, actualResult);
}
}
}