From 53775accd5c768a193da20bded704be47e915548 Mon Sep 17 00:00:00 2001 From: Rich Gillam <62772518+richgillam@users.noreply.github.com> Date: Fri, 12 Aug 2022 16:07:52 -0700 Subject: [PATCH] ICU-22081 PersonNameFormatter tech preview --- .../impl/personname/FieldModifierImpl.java | 156 ++++++++ .../personname/PersonNameFormatterImpl.java | 251 ++++++++++++ .../impl/personname/PersonNamePattern.java | 269 +++++++++++++ .../com/ibm/icu/text/PersonNameFormatter.java | 370 ++++++++++++++++++ .../com/ibm/icu/text/SimplePersonName.java | 163 ++++++++ .../test/format/PersonNameFormatterTest.java | 341 ++++++++++++++++ 6 files changed, 1550 insertions(+) create mode 100644 icu4j/main/classes/core/src/com/ibm/icu/impl/personname/FieldModifierImpl.java create mode 100644 icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java create mode 100644 icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java create mode 100644 icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java create mode 100644 icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java create mode 100644 icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/FieldModifierImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/FieldModifierImpl.java new file mode 100644 index 00000000000..257704a7864 --- /dev/null +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/FieldModifierImpl.java @@ -0,0 +1,156 @@ +// © 2022 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package com.ibm.icu.impl.personname; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.BreakIterator; +import com.ibm.icu.text.CaseMap; +import com.ibm.icu.text.PersonNameFormatter; +import com.ibm.icu.text.SimpleFormatter; +import com.ibm.icu.util.ULocale; + +import java.util.StringTokenizer; + +/** + * Parent class for classes that implement field-modifier behavior. + */ +abstract class FieldModifierImpl { + public abstract String modifyField(String fieldValue); + + public static FieldModifierImpl forName(PersonNameFormatter.FieldModifier modifierID, PersonNameFormatterImpl formatterImpl) { + switch (modifierID) { + case INFORMAL: + return NOOP_MODIFIER; + case PREFIX: + return NULL_MODIFIER; + case CORE: + return NOOP_MODIFIER; + case ALL_CAPS: + return new AllCapsModifier(formatterImpl.getLocale()); + case INITIAL_CAP: + return new InitialCapModifier(formatterImpl.getLocale()); + case INITIAL: + return new InitialModifier(formatterImpl.getInitialPattern(), formatterImpl.getInitialSequencePattern()); + case MONOGRAM: + return MONOGRAM_MODIFIER; + default: + throw new IllegalArgumentException("Invalid modifier ID " + modifierID); + } + } + + /** + * A field modifier that just returns the field value unmodified. This is used to implement the default + * behavior of the "informal" and "core" modifiers ("real" informal or core variants have to be supplied or + * calculated by the PersonName object). + */ + private static final FieldModifierImpl NOOP_MODIFIER = new FieldModifierImpl() { + @Override + public String modifyField(String fieldValue) { + return fieldValue; + } + }; + + /** + * A field modifier that just returns the empty string. This is used to implement the default behavior of the + * "prefix" modifier ("real" prefix variants have to be supplied to calculated by the PersonName object). + */ + private static final FieldModifierImpl NULL_MODIFIER = new FieldModifierImpl() { + @Override + public String modifyField(String fieldValue) { + return ""; + } + }; + + /** + * A field modifier that returns the field value converted to ALL CAPS. This is the default behavior + * for the "allCaps" modifier. + */ + private static class AllCapsModifier extends FieldModifierImpl { + private final ULocale locale; + + public AllCapsModifier(ULocale locale) { + this.locale = locale; + } + + @Override + public String modifyField(String fieldValue) { + return UCharacter.toUpperCase(locale, fieldValue); + } + } + + /** + * A field modifier that returns the field value with the first letter of each word capitalized. This is + * the default behavior of the "initialCap" modifier. + */ + private static class InitialCapModifier extends FieldModifierImpl { + private final ULocale locale; + private static final CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE = CaseMap.toTitle().wholeString().noLowercase(); + + public InitialCapModifier(ULocale locale) { + this.locale = locale; + } + + @Override + public String modifyField(String fieldValue) { + return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, fieldValue); + } + } + + /** + * A field modifier that returns the field value converted into one or more initials. This is the first grapheme + * cluster of each word in the field value, modified using the initialPattern/initial resource value from the + * locale data, and strung together using the initialPattern/initialSequence resource value from the locale data. + * (In English, these patterns put periods after each initial and connect them with spaces.) + * This is default behavior of the "initial" modifier. + */ + private static class InitialModifier extends FieldModifierImpl { + private final SimpleFormatter initialFormatter; + private final SimpleFormatter initialSequenceFormatter; + + public InitialModifier(String initialPattern, String initialSequencePattern) { + this.initialFormatter = SimpleFormatter.compile(initialPattern); + this.initialSequenceFormatter = SimpleFormatter.compile(initialSequencePattern); + } + + @Override + public String modifyField(String fieldValue) { + String result = null; + StringTokenizer tok = new StringTokenizer(fieldValue, " "); + while (tok.hasMoreTokens()) { + String curInitial = getFirstGrapheme(tok.nextToken()); + if (result == null) { + result = initialFormatter.format(curInitial); + } else { + result = initialSequenceFormatter.format(result, initialFormatter.format(curInitial)); + } + } + return result; + } + } + + /** + * A field modifier that simply returns the first grapheme cluster in the field value. + * This is the default implementation of the "monogram" modifier. + */ + private static final FieldModifierImpl MONOGRAM_MODIFIER = new FieldModifierImpl() { + @Override + public String modifyField(String fieldValue) { + return getFirstGrapheme(fieldValue); + } + }; + + /** + * A utility function that just returns the first grapheme cluster in the string. + */ + private static String getFirstGrapheme(String s) { + // early out if the string is empty to avoid StringIndexOutOfBoundsException + if (s.isEmpty()) { + return ""; + } + + // (currently, no locale overrides the grapheme-break rules, so we just use "root" instead of passing in the locale) + BreakIterator bi = BreakIterator.getCharacterInstance(ULocale.ROOT); + bi.setText(s); + return s.substring(0, bi.next()); + } +} diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java new file mode 100644 index 00000000000..877c6276341 --- /dev/null +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java @@ -0,0 +1,251 @@ +// © 2022 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package com.ibm.icu.impl.personname; + +import com.ibm.icu.impl.ICUData; +import com.ibm.icu.impl.ICUResourceBundle; +import com.ibm.icu.lang.UScript; +import com.ibm.icu.text.FormattedValue; +import com.ibm.icu.text.PersonNameFormatter; +import com.ibm.icu.util.ULocale; +import com.ibm.icu.util.UResourceBundle; + +import java.util.*; + +import static com.ibm.icu.util.UResourceBundle.ARRAY; +import static com.ibm.icu.util.UResourceBundle.STRING; + +/** + * Actual implementation class for PersonNameFormatter. + */ +public class PersonNameFormatterImpl { + private final ULocale locale; + private final PersonNamePattern[] gnFirstPatterns; + private final PersonNamePattern[] snFirstPatterns; + private final Set gnFirstLocales; + private final Set snFirstLocales; + private final String initialPattern; + private final String initialSequencePattern; + private final boolean capitalizeSurname; + private final String foreignSpaceReplacement; + private final boolean formatterLocaleUsesSpaces; + private final PersonNameFormatter.Length length; + private final PersonNameFormatter.Usage usage; + private final PersonNameFormatter.Formality formality; + private final Set options; + + public PersonNameFormatterImpl(ULocale locale, + PersonNameFormatter.Length length, + PersonNameFormatter.Usage usage, + PersonNameFormatter.Formality formality, + Set options) { + // null for `options` is the same as the empty set + if (options == null) { + options = new HashSet<>(); + } + + // save off our creation parameters (these are only used if we have to create a second formatter) + this.length = length; + this.usage = usage; + this.formality = formality; + this.options = options; + + // load simple property values from the resource bundle (or the options set) + ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME, locale); + this.locale = locale; + this.initialPattern = rb.getStringWithFallback("personNames/initialPattern/initial"); + this.initialSequencePattern = rb.getStringWithFallback("personNames/initialPattern/initialSequence"); + this.capitalizeSurname = options.contains(PersonNameFormatter.Options.SURNAME_ALLCAPS); + this.foreignSpaceReplacement = rb.getStringWithFallback("personNames/foreignSpaceReplacement"); + this.formatterLocaleUsesSpaces = !LOCALES_THAT_DONT_USE_SPACES.contains(locale.getLanguage()); + + // asjust for combinations of parameters that don't make sense in practice + if (usage == PersonNameFormatter.Usage.MONOGRAM) { + // we don't support SORTING in conjunction with MONOGRAM; if the caller passes in SORTING, remove it from + // the options list + options.remove(PersonNameFormatter.Options.SORTING); + } else if (options.contains(PersonNameFormatter.Options.SORTING)) { + // we only support SORTING in conjunction with REFERRING; if the caller passes in ADDRESSING, treat it + // the same as REFERRING + usage = PersonNameFormatter.Usage.REFERRING; + } + + // load the actual formatting patterns-- since we don't know the name order until formatting time (it can be + // different for different names), load patterns for both GN-first and SN-first names. (If the user has + // specified SORTING, we don't need to do this-- we just load the "sorting" patterns and ignore the name's order.) + final String RESOURCE_PATH_PREFIX = "personNames/namePattern/"; + String resourceNameBody = length.toString().toLowerCase() + "-" + usage.toString().toLowerCase() + "-" + + formality.toString().toLowerCase(); + if (!options.contains(PersonNameFormatter.Options.SORTING)) { + ICUResourceBundle gnFirstResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "givenFirst-" + resourceNameBody); + ICUResourceBundle snFirstResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "surnameFirst-" + resourceNameBody); + + gnFirstPatterns = PersonNamePattern.makePatterns(asStringArray(gnFirstResource), this); + snFirstPatterns = PersonNamePattern.makePatterns(asStringArray(snFirstResource), this); + + gnFirstLocales = new HashSet<>(); + Collections.addAll(gnFirstLocales, asStringArray(rb.getWithFallback("personNames/nameOrderLocales/givenFirst"))); + snFirstLocales = new HashSet<>(); + Collections.addAll(snFirstLocales, asStringArray(rb.getWithFallback("personNames/nameOrderLocales/surnameFirst"))); + } else { + ICUResourceBundle patternResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "sorting-" + resourceNameBody); + + gnFirstPatterns = PersonNamePattern.makePatterns(asStringArray(patternResource), this); + snFirstPatterns = null; + gnFirstLocales = null; + snFirstLocales = null; + } + } + + public String format(PersonNameFormatter.PersonName name) { + // TODO: Should probably return a FormattedPersonName object + + // if the formatter is for a language that doesn't use spaces between words and the name is from a language + // that does, create a formatter for the NAME'S locale and use THAT to format the name + ULocale nameLocale = name.getNameLocale(); + boolean nameLocaleUsesSpaces = !LOCALES_THAT_DONT_USE_SPACES.contains(nameLocale.getLanguage()); + if (!formatterLocaleUsesSpaces && nameLocaleUsesSpaces) { + PersonNameFormatterImpl nativeFormatter = new PersonNameFormatterImpl(nameLocale, this.length, + this.usage, this.formality, this.options); + String result = nativeFormatter.format(name); + + // BUT, if the name is actually written in the formatter locale's script, replace any spaces in the name + // with the foreignSpaceReplacement character + if (!foreignSpaceReplacement.equals(" ") && scriptMatchesLocale(result, this.locale)) { + result = result.replace(" ", this.foreignSpaceReplacement); + } + return result; + } + + // if we get down to here, we're just doing normal formatting-- if we have both GN-first and SN-first rules, + // choose which one to use based on the name's locale and preferred field order + if (snFirstPatterns == null || nameIsGnFirst(name)) { + return getBestPattern(gnFirstPatterns, name).format(name); + } else { + return getBestPattern(snFirstPatterns, name).format(name); + } + } + + public ULocale getLocale() { + return locale; + } + + public String getInitialPattern() { + return initialPattern; + } + + public String getInitialSequencePattern() { + return initialSequencePattern; + } + + public boolean shouldCapitalizeSurname() { + return capitalizeSurname; + } + + private final Set LOCALES_THAT_DONT_USE_SPACES = new HashSet<>(Arrays.asList("ja", "zh", "th", "yue")); + + /** + * Returns the value of the resource, as a string array. + * @param resource An ICUResourceBundle of type STRING or ARRAY. If ARRAY, this function just returns it + * as a string array. If STRING, it returns a one-element array containing that string. + * @return The resource's value, as an array of Strings. + */ + private String[] asStringArray(ICUResourceBundle resource) { + if (resource.getType() == STRING) { + return new String[] { resource.getString() }; + } else if (resource.getType() == ARRAY){ + return resource.getStringArray(); + } else { + throw new IllegalStateException("Unsupported resource type " + resource.getType()); + } + } + + /** + * Returns the field order to use when formatting this name, taking into account the name's preferredOrder + * field, as well as the name and formatter's respective locales. + * @param name The name to be formatted. + * @return If true, use GN-first order to format the name; if false, use SN-first order. + */ + private boolean nameIsGnFirst(PersonNameFormatter.PersonName name) { + // the name can declare its order-- check that first (it overrides any locale-based calculation) + Set modifiers = new HashSet<>(); + String preferredOrder = name.getFieldValue(PersonNameFormatter.NameField.PREFERRED_ORDER, modifiers); + if (preferredOrder != null) { + if (preferredOrder.equals("givenFirst")) { + return true; + } else if (preferredOrder.equals("surnameFirst")) { + return false; + } else { + throw new IllegalArgumentException("Illegal preferredOrder value " + preferredOrder); + } + } + + String localeStr = name.getNameLocale().toString(); + do { + if (gnFirstLocales.contains(localeStr)) { + return true; + } else if (snFirstLocales.contains(localeStr)) { + return false; + } + + int lastUnderbarPos = localeStr.lastIndexOf("_"); + if (lastUnderbarPos >= 0) { + localeStr = localeStr.substring(0, lastUnderbarPos); + } else { + localeStr = "root"; + } + } while (!localeStr.equals("root")); + + // should never get here-- "root" should always be in one of the locales + return true; + } + + private PersonNamePattern getBestPattern(PersonNamePattern[] patterns, PersonNameFormatter.PersonName name) { + // early out if there's only one pattern + if (patterns.length == 1) { + return patterns[0]; + } else { + // if there's more than one pattern, return the one that contains the greatest number of fields that + // actually have values in `name`. If there's a tie, return the pattern that contains the lowest number + // of fields that DON'T have values in `name`. + int maxPopulatedFields = 0; + int minEmptyFields = Integer.MAX_VALUE; + PersonNamePattern bestPattern = null; + + for (PersonNamePattern pattern : patterns) { + int populatedFields = pattern.numPopulatedFields(name); + int emptyFields = pattern.numEmptyFields(name); + if (populatedFields > maxPopulatedFields) { + maxPopulatedFields = populatedFields; + minEmptyFields = emptyFields; + bestPattern = pattern; + } else if (populatedFields == maxPopulatedFields && emptyFields < minEmptyFields) { + minEmptyFields = emptyFields; + bestPattern = pattern; + } + } + return bestPattern; + } + } + + /** + * Returns true if the script of `s` is one of the default scripts for `locale`. + * This function only checks the script of the first character whose script isn't "common," + * so it probably won't work right on mixed-script strings. + */ + private boolean scriptMatchesLocale(String s, ULocale locale) { + int[] localeScripts = UScript.getCode(locale); + int stringScript = UScript.COMMON; + for (int i = 0; stringScript == UScript.COMMON && i < s.length(); i++) { + char c = s.charAt(i); + stringScript = UScript.getScript(c); + } + + for (int localeScript : localeScripts) { + if (localeScript == stringScript) { + return true; + } + } + return false; + } +} diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java new file mode 100644 index 00000000000..c7d547f10cf --- /dev/null +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java @@ -0,0 +1,269 @@ +// © 2022 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package com.ibm.icu.impl.personname; + +import com.ibm.icu.text.PersonNameFormatter; + +import java.util.*; + +/** + * A single name formatting pattern, corresponding to a single namePattern element in CLDR. + */ +class PersonNamePattern { + private String patternText; // for debugging + private Element[] patternElements; + + public static PersonNamePattern[] makePatterns(String[] patternText, PersonNameFormatterImpl formatterImpl) { + PersonNamePattern[] result = new PersonNamePattern[patternText.length]; + for (int i = 0; i < patternText.length; i++) { + result[i] = new PersonNamePattern(patternText[i], formatterImpl); + } + return result; + } + + private PersonNamePattern(String patternText, PersonNameFormatterImpl formatterImpl) { + this.patternText = patternText; + + List elements = new ArrayList<>(); + boolean inField = false; + boolean inEscape = false; + StringBuilder workingString = new StringBuilder(); + for (int i = 0; i < patternText.length(); i++) { + char c = patternText.charAt(i); + + if (inEscape) { + workingString.append(c); + inEscape = false; + } else { + switch (c) { + case '\\': + inEscape = true; + break; + case '{': + if (!inField) { + if (workingString.length() > 0) { + elements.add(new LiteralText(workingString.toString())); + workingString = new StringBuilder(); + } + inField = true; + } else { + throw new IllegalArgumentException("Nested braces are not allowed in name patterns"); + } + break; + case '}': + if (inField) { + if (workingString.length() > 0) { + elements.add(new NameFieldImpl(workingString.toString(), formatterImpl)); + workingString = new StringBuilder(); + } else { + throw new IllegalArgumentException("No field name inside braces"); + } + inField = false; + } else { + throw new IllegalArgumentException("Unmatched closing brace in literal text"); + } + break; + default: + workingString.append(c); + } + } + } + if (workingString.length() > 0) { + elements.add(new LiteralText(workingString.toString())); + } + this.patternElements = elements.toArray(new Element[0]); + } + + public String format(PersonNameFormatter.PersonName name) { + StringBuilder result = new StringBuilder(); + boolean seenLeadingField = false; + boolean seenEmptyLeadingField = false; + boolean seenEmptyField = false; + StringBuilder textBefore = new StringBuilder(); + StringBuilder textAfter = new StringBuilder(); + + // the logic below attempts to implement the following algorithm: + // - If one or more fields at the beginning of the name are empty, also skip all literal text + // from the beginning of the name up to the first populated field. + // - If one or more fields at the end of the name are empty, also skip all literal text from + // the last populated field to the end of the name. + // - If one or more contiguous fields in the middle of the name are empty, skip the literal text + // between them, omit characters from the literal text on either side of the empty fields up to + // the first space on either side, and make sure that the resulting literal text doesn't end up + // with two spaces in a row. + for (Element element : patternElements) { + if (element.isLiteral()) { + if (seenEmptyLeadingField) { + // do nothing; throw away the literal text + } else if (seenEmptyField) { + textAfter.append(element.format(name)); + } else { + textBefore.append(element.format(name)); + } + } else { + String fieldText = element.format(name); + if (fieldText == null || fieldText.isEmpty()) { + if (!seenLeadingField) { + seenEmptyLeadingField = true; + textBefore.setLength(0); + } else { + seenEmptyField = true; + textAfter.setLength(0); + } + } else { + seenLeadingField = true; + seenEmptyLeadingField = false; + if (seenEmptyField) { + result.append(coalesce(textBefore, textAfter)); + result.append(fieldText); + seenEmptyField = false; + } else { + result.append(textBefore); + textBefore.setLength(0); + result.append(element.format(name)); + } + } + } + } + if (!seenEmptyField) { + result.append(textBefore); + } + return result.toString(); + } + + public int numPopulatedFields(PersonNameFormatter.PersonName name) { + int result = 0; + for (Element element : patternElements) { + result += element.isPopulated(name) ? 1 : 0; + } + return result; + } + + public int numEmptyFields(PersonNameFormatter.PersonName name) { + int result = 0; + for (Element element : patternElements) { + result += element.isPopulated(name) ? 0 : 1; + } + return result; + } + + /** + * Stitches together the literal text on either side of an omitted field by deleting any + * non-whitespace characters immediately neighboring the omitted field and coalescing any + * adjacent spaces at the join point down to one. + * @param s1 The literal text before the omitted field. + * @param s2 The literal text after the omitted field. + */ + private String coalesce(StringBuilder s1, StringBuilder s2) { + // get the range of non-whitespace characters at the beginning of s1 + int p1 = 0; + while (p1 < s1.length() && !Character.isWhitespace(s1.charAt(p1))) { + ++p1; + } + + // get the range of non-whitespace characters at the end of s2 + int p2 = s2.length() - 1; + while (p2 >= 0 && !Character.isWhitespace(s2.charAt(p2))) { + --p2; + } + + // also include one whitespace character from s1 or, if there aren't + // any, one whitespace character from s2 + if (p1 < s1.length()) { + ++p1; + } else if (p2 >= 0) { + --p2; + } + + // concatenate those two ranges to get the coalesced literal text + String result = s1.substring(0, p1) + s2.substring(p2 + 1); + + // clear out s1 and s2 (done here to improve readability in format() above)) + s1.setLength(0); + s2.setLength(0); + + return result; + } + + /** + * A single element in a NamePattern. This is either a name field or a range of literal text. + */ + private interface Element { + boolean isLiteral(); + String format(PersonNameFormatter.PersonName name); + boolean isPopulated(PersonNameFormatter.PersonName name); + } + + /** + * Literal text from a name pattern. + */ + private static class LiteralText implements Element { + private String text; + + public LiteralText(String text) { + this.text = text; + } + + public boolean isLiteral() { + return true; + } + + public String format(PersonNameFormatter.PersonName name) { + return text; + } + + public boolean isPopulated(PersonNameFormatter.PersonName name) { + return false; + } + } + + /** + * An actual name field in a NamePattern (i.e., the stuff represented in the pattern by text + * in braces). This class actually handles fetching the value for the field out of a + * PersonName object and applying any modifiers to it. + */ + private static class NameFieldImpl implements Element { + private PersonNameFormatter.NameField fieldID; + private Map modifiers; + + public NameFieldImpl(String fieldNameAndModifiers, PersonNameFormatterImpl formatterImpl) { + List modifierIDs = new ArrayList<>(); + StringTokenizer tok = new StringTokenizer(fieldNameAndModifiers, "-"); + + this.fieldID = PersonNameFormatter.NameField.forString(tok.nextToken()); + while (tok.hasMoreTokens()) { + modifierIDs.add(PersonNameFormatter.FieldModifier.forString(tok.nextToken())); + } + if (this.fieldID == PersonNameFormatter.NameField.SURNAME && formatterImpl.shouldCapitalizeSurname()) { + modifierIDs.add(PersonNameFormatter.FieldModifier.ALL_CAPS); + } + + this.modifiers = new HashMap<>(); + for (PersonNameFormatter.FieldModifier modifierID : modifierIDs) { + this.modifiers.put(modifierID, FieldModifierImpl.forName(modifierID, formatterImpl)); + } + } + + public boolean isLiteral() { + return false; + } + + public String format(PersonNameFormatter.PersonName name) { + Set modifierIDs = new HashSet<>(modifiers.keySet()); + String result = name.getFieldValue(fieldID, modifierIDs); + if (result != null) { + for (PersonNameFormatter.FieldModifier modifierID : modifierIDs) { + result = modifiers.get(modifierID).modifyField(result); + } + } + return result; + } + + public boolean isPopulated(PersonNameFormatter.PersonName name) { + // just check whether the unmodified field contains a value + Set modifierIDs = new HashSet<>(); + String fieldValue = name.getFieldValue(fieldID, modifierIDs); + return fieldValue != null && !fieldValue.isEmpty(); + } + } +} diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java b/icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java new file mode 100644 index 00000000000..160d32c97a1 --- /dev/null +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java @@ -0,0 +1,370 @@ +// © 2022 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package com.ibm.icu.text; + +import com.ibm.icu.impl.personname.PersonNameFormatterImpl; +import com.ibm.icu.util.ULocale; + +import java.util.Set; + +/** + * A class for formatting names of people. Takes raw name data for a person and renders it into a string according to + * the caller's specifications, taking into account how people's names are rendered in the caller's locale. + * + * The Length, Usage, and Formality options can be used to get a wide variety of results. In English, they would + * produce results along these lines: + * + * | | REFERRING | REFERRING | ADDRESSING | ADDRESSING | MONOGRAM | MONOGRAM | + * | | FORMAL | INFORMAL | FORMAL | INFORMAL | FORMAL | INFORMAL | + * |--------|-----------------------|--------------|------------|------------|----------|----------| + * | LONG | James Earl Carter Jr. | Jimmy Carter | Mr. Carter | Jimmy | JEC | JC | + * | MEDIUM | James E. Carter Jr. | Jimmy Carter | Mr. Carter | Jimmy | C | J | + * | SHORT | J. E. Carter | Jimmy Carter | Mr. Carter | Jimmy | C | J | + * + * @internal + */ +public class PersonNameFormatter { + //============================================================================== + // Parameters that control formatting behavior + + /** + * Specifies the desired length of the formatted name. + * @internal + */ + public enum Length { + /** + * The longest name length. Generally uses most of the fields in the name object. + * @internal + */ + LONG, + + /** + * The most typical name length. Generally includes the given name and surname, but generally + * nost most of the other fields. + * @internal + */ + MEDIUM, + + /** + * A shortened name. Skips most fields and may abbreviate some name fields to just their initials. + * When Formality is INFORMAL, may only include one field. + */ + SHORT + } + + /** + * Specifies the intended usage of the formatted name. + * @internal + */ + public enum Usage { + /** + * Used for when the name is going to be used to address the user directly: "Turn left here, John." + * @internal + */ + ADDRESSING, + + /** + * Used in general cases, when the name is used to refer to somebody else. + * @internal + */ + REFERRING, + + /** + * Used to generate monograms, short 1 to 3-character versions of the name suitable for use in things + * like chat avatars. In English, this is usually the person's initials, but this isn't true in all + * languages. When the caller specifies Usage.MONOGRAM, the Length parameter can be used to get different + * lengths of monograms: Length.SHORT is generally a single letter; Length.LONG may be as many as three or four. + * @internal + */ + MONOGRAM + } + + /** + * Specifies the intended formality of the formatted name. + * @internal + */ + public enum Formality { + /** + * The more formal version of the name. + * @internal + */ + FORMAL, + + /** + * The more informal version of the name. In English, this might omit fields or use the "informal" variant + * of the given name. + * @internal + */ + INFORMAL + } + + /** + * Additional options to customize the behavior of the formatter. + * @internal + */ + public enum Options { + /** + * Causes the formatter to generate results suitable for inclusion in a sorted list. For GN-first languages, + * this generally means moving the surname to the beginning of the string, with a comma between it and + * the rest of the name: e.g., "Carter, James E. Jr.". + * @internal + */ + SORTING, + + /** + * Requests that the surname in the formatted result be rendered in ALL CAPS. This is often done with + * Japanese names to highlight which name is the surname. + * @internal + */ + SURNAME_ALLCAPS + } + + //============================================================================== + // Identifiers used to request field values from the PersonName object + + /** + * Identifiers for the name fields supported by the PersonName object. + * @internal + */ + public enum NameField { + /** + * Contains titles and other words that precede the actual name, such as "Mr." + * @internal + */ + PREFIX("prefix"), + + /** + * The given name. May contain more than one token. + * @internal + */ + GIVEN("given"), + + /** + * Additional given names. (In English, this is usually the "middle name" and + * may contain more than one word.) + * @internal + */ + GIVEN2("given2"), + + /** + * The surname. In Spanish, this is the patronymic surname. + * @internal + */ + SURNAME("surname"), + + /** + * Additional surnames. This is only used in a few languages, such as Spanish, + * where it is the matronymic surname. (In most languages, multiple surnames all + * just go in the SURNAME field.) + * @internal + */ + SURNAME2("surname2"), + + /** + * Generational and professional qualifiers that generally follow the actual name, + * such as "Jr." or "M.D." + * @internal + */ + SUFFIX("suffix"), + + /** + * The preferred field order for the name. PersonName objects generally shouldn't provide + * this field, allowing the PersonNameFormatter to deduce the proper field order based on + * the locales of the name of the formatter. But this can be used to force a particular + * field order, generally in cases where the deduction logic in PersonNameFormatter would + * guess wrong. When used, the only valid values are "givenFirst" and "surnameFirst". + * @internal + */ + PREFERRED_ORDER("preferredOrder"); + + private final String name; + + private NameField(String name) { + this.name = name; + } + + /** + * Returns the NameField's display name. + * @internal + */ + @Override + public String toString() { + return name; + } + + /** + * Returns the appropriate NameField for its display name. + * @internal + */ + public static NameField forString(String name) { + for (NameField field : values()) { + if (field.name.equals(name)) { + return field; + } + } + throw new IllegalArgumentException("Invalid field name " + name); + } + } + + /** + * Identifiers for the name field modifiers supported by the PersonName and PersonNameFormatter objects. + * @internal + */ + public enum FieldModifier { + /** + * Requests an "informal" variant of the field, generally a nickname of some type: + * if "given" is "James", "given-informal" might be "Jimmy". Only applied to the "given" + * field. If the PersonName object doesn't apply this modifier, PersonNameFormatter just + * uses the unmodified version of "given". + * @internal + */ + INFORMAL("informal"), + + /** + * If the field contains a main word with one or more separate prefixes, such as + * "van den Hul", this requests just the prefixes ("van den"). Only applied to the "surname" + * field. If the PersonName object doesn't apply this modifier, PersonNameFormatter + * assumes there are no prefixes. + * @internal + */ + PREFIX("prefix"), + + /** + * If the field contains a main word with one or more separate prefixes, such as + * "van den Hul", this requests just the main word ("Hul"). Only applied to the "surname" + * field. If the implementing class doesn't apply this modifier, PersonNameFormatter + * assumes the entire "surname" field is the "core". + * @internal + */ + CORE("core"), + + /** + * Requests an initial for the specified field. PersonNameFormatter will do + * this algorithmically, but a PersonName object can apply this modifier itself if it wants + * different initial-generation logic (or stores the initial separately). + * @internal + */ + INITIAL("initial"), + + /** + * Requests an initial for the specified field, suitable for use in a monogram + * (this usually differs from "initial" in that "initial" adds a period and "monogram" doesn't). + * PersonNameFormatter will do this algorithmically, but a PersonName object can apply + * this modifier itself if it wants different monogram-generation logic. + * @internal + */ + MONOGRAM("monogram"), + + /** + * Requests the field value converted to ALL CAPS. PersonName objects + * generally won't need to handle this modifier themselves. + * @internal + */ + ALL_CAPS("allCaps"), + + /** + * Requests the field value with the first letter of each word capitalized. + * A PersonName object might handle this modifier itself to capitalize words more + * selectively. + * @internal + */ + INITIAL_CAP("initialCap"); + + private final String name; + + private FieldModifier(String name) { + this.name = name; + } + + /** + * Returns the FieldModifier's display name. + * @internal + */ + @Override + public String toString() { + return name; + } + + /** + * Returns the appropriate fieldModifier for its display name. + * @internal + */ + public static FieldModifier forString(String name) { + for (FieldModifier modifier : values()) { + if (modifier.name.equals(name)) { + return modifier; + } + } + throw new IllegalArgumentException("Invalid modifier name " + name); + } + } + + //============================================================================== + // The PersonName object + + /** + * An object used to provide name data to the PersonNameFormatter for formatting. + * Clients can implement this interface to talk directly to some other subsystem + * that actually contains the name data (instead of having to copy it into a separate + * object just for formatting) or to override the default modifier behavior described + * above. A concrete SimplePersonName object that does store the field values directly + * is provided. + * @internal + * @see SimplePersonName + */ + public interface PersonName { + /** + * Returns the locale of the name-- that is, the language or country of origin for the person being named. + * @return The name's locale. + * @internal + */ + public ULocale getNameLocale(); + + /** + * Returns one field of the name, possibly in a modified form. + * @param identifier The identifier of the requested field. + * @param modifiers An **IN/OUT** parameter that specifies modifiers to apply to the basic field value. + * An implementing class can choose to handle or ignore any modifiers; it should modify + * this parameter so that on exit, it contains only the requested modifiers that it + * DIDN'T handle. + * @return The value of the requested field, optionally modified by some or all of the requested modifiers, or + * null if the requested field isn't present in the name. + * @internal + */ + public String getFieldValue(NameField identifier, Set modifiers); + } + + private final PersonNameFormatterImpl impl; + + //============================================================================== + // Public API on PersonNameFormatter + + /** + * Constructs a PersonNameFormatter. + * @param locale The target locale for formatted names. + * @param length The requested length. + * @param usage The requested usage. + * @param formality The requested formality. + * @param options A set containing additional formatting options. May be null. + * @see Length + * @see Usage + * @see Formality + * @see Options + * @internal + */ + public PersonNameFormatter(ULocale locale, Length length, Usage usage, Formality formality, Set options) { + this.impl = new PersonNameFormatterImpl(locale, length, usage, formality, options); + } + + /** + * Formats a name. + * @param name A PersonName object that supplies individual field values (optionally, with modifiers applied) + * to the formatter for formatting. + * @return The name, formatted according to the locale and other parameters passed to the formatter's constructor. + * @internal + */ + public String format(PersonName name) { + // TODO: Should probably return a FormattedPersonName object + return impl.format(name); + } +} diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java b/icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java new file mode 100644 index 00000000000..5ebea7a2432 --- /dev/null +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java @@ -0,0 +1,163 @@ +// © 2022 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package com.ibm.icu.text; + +import com.ibm.icu.util.ULocale; + +import java.util.*; + +/** + * A concrete implementation of PersonNameFormatter.PersonName that simply stores the field + * values in a Map. + * + * A caller can store both raw field values (such as "given") and modified field values (such as "given-informal") + * in a SimplePersonName. But beyond storing and returning modified field values provided to it by the caller, + * SimplePersonName relies on the PersonNameFormatter's default handling of field modifiers. + * @internal + */ +public class SimplePersonName implements PersonNameFormatter.PersonName { + /** + * Simple constructor. + * @param nameLocale The locale of the name (i.e., its ethnic or national origin). + * @param fieldValues A Map mapping from field names to field values. The field names + * are the values returned by NameField.toString(). + * @internal + */ + public SimplePersonName(ULocale nameLocale, Map fieldValues) { + this.nameLocale = nameLocale; + this.fieldValues = new HashMap<>(fieldValues); + } + + /** + * A constructor that takes the locale ID and field values as a single String. This constructor is really + * intended only for the use of the PersonNameFormatter unit tests. + * @param keysAndValues A single string containing the locale ID and field values. This string is organized + * into key-value pairs separated by commas. The keys are separated from the values + * by equal signs. The keys themselves are field names, as returned by + * NameField.toString(), optionally followed by a hyphen-delimited set of modifier names, + * as returned by FieldModifier.toString(). + * @internal + */ + public SimplePersonName(String keysAndValues) { + this.fieldValues = new HashMap<>(); + + StringTokenizer tok = new StringTokenizer(keysAndValues, ","); + ULocale tempLocale = null; + while (tok.hasMoreTokens()) { + String entry = tok.nextToken(); + int equalPos = entry.indexOf('='); + if (equalPos < 0) { + throw new IllegalArgumentException("No = found in name field entry"); + } + String fieldName = entry.substring(0, equalPos); + String fieldValue = entry.substring(equalPos + 1); + + if (fieldName.equals("locale")) { + tempLocale = new ULocale(fieldValue); + } else { + this.fieldValues.put(fieldName, fieldValue); + } + } + this.nameLocale = tempLocale; + + // special-case code for the "surname" field-- if it isn't specified, but "surname-prefix" and + // "surname-core" both are, let "surname" be the other two fields joined with a space + if (this.fieldValues.get("surname") == null) { + String surnamePrefix = this.fieldValues.get("surname-prefix"); + String surnameCore = this.fieldValues.get("surname-core"); + if (surnamePrefix != null && surnameCore != null) { + this.fieldValues.put("surname", surnamePrefix + " " + surnameCore); + } + } + } + + /** + * Returns the locale of the name-- that is, the language or country of origin for the person being named. + * @return The name's locale. + * @internal + */ + @Override + public ULocale getNameLocale() { + return nameLocale; + } + + /** + * Returns one field of the name, possibly in a modified form. This class can store modified versions of fields, + * provided at construction time, and this function will return them. Otherwise, it ignores modifiers and + * relies on PersonNameFormat's default modifier handling. + * @param nameField The identifier of the requested field. + * @param modifiers An **IN/OUT** parameter that specifies modifiers to apply to the basic field value. + * On return, this list will contain any modifiers that this object didn't handle. This class + * will always return this set unmodified, unless a modified version of the requested field + * was provided at construction time. + * @return The value of the requested field, optionally modified by some or all of the requested modifiers, or + * null if the requested field isn't present in the name. + * @internal + */ + @Override + public String getFieldValue(PersonNameFormatter.NameField nameField, Set modifiers) { + // first look for the fully modified name in the internal table + String fieldName = nameField.toString(); + String result = fieldValues.get(makeModifiedFieldName(nameField, modifiers)); + if (result != null) { + modifiers.clear(); + return result; + } + + // if we don't find it, check the fully unmodified name. If it's not there, nothing else will be + result = fieldValues.get(fieldName); + if (result == null) { + return null; + } else if (modifiers.size() == 1) { + // and if it IS there and there's only one modifier, we're done + return result; + } + + // but if there are two or more modifiers, then we have to go through the whole list of fields and look for the best match + String winningKey = fieldName; + int winningScore = 0; + for (String key : fieldValues.keySet()) { + if (key.startsWith(fieldName)) { + Set keyModifiers = makeModifiersFromName(key); + if (modifiers.containsAll(keyModifiers)) { + if (keyModifiers.size() > winningScore || (keyModifiers.size() == winningScore && key.compareTo(winningKey) < 0)) { + winningKey = key; + winningScore = keyModifiers.size(); + } + } + } + } + result = fieldValues.get(winningKey); + modifiers.removeAll(makeModifiersFromName(winningKey)); + return result; + } + + private static String makeModifiedFieldName(PersonNameFormatter.NameField fieldName, + Collection modifiers) { + StringBuilder result = new StringBuilder(); + result.append(fieldName); + + TreeSet sortedModifierNames = new TreeSet<>(); + for (PersonNameFormatter.FieldModifier modifier : modifiers) { + sortedModifierNames.add(modifier.toString()); + } + for (String modifierName : sortedModifierNames) { + result.append("-"); + result.append(modifierName); + } + return result.toString(); + } + + private static Set makeModifiersFromName(String modifiedName) { + StringTokenizer tok = new StringTokenizer(modifiedName, "-"); + Set result = new HashSet<>(); + String fieldName = tok.nextToken(); // throw away the field name + while (tok.hasMoreTokens()) { + result.add(PersonNameFormatter.FieldModifier.forString(tok.nextToken())); + } + return result; + } + + private final ULocale nameLocale; + private final Map fieldValues; +} \ No newline at end of file diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java new file mode 100644 index 00000000000..1046d19e27b --- /dev/null +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java @@ -0,0 +1,341 @@ +// © 2022 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package com.ibm.icu.dev.test.format; + +import com.ibm.icu.dev.test.TestFmwk; +import com.ibm.icu.text.PersonNameFormatter; +import com.ibm.icu.text.SimplePersonName; +import com.ibm.icu.util.ULocale; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import java.util.*; + +@RunWith(JUnit4.class) +public class PersonNameFormatterTest extends TestFmwk{ + private static class NameAndTestCases { + public String nameFields; + public String[][] testCases; + + public NameAndTestCases(String nameFields, String[][] testCases) { + this.nameFields = nameFields; + this.testCases = testCases; + } + } + + private void executeTestCases(NameAndTestCases[] namesAndTestCases, boolean forDebugging) { + for (NameAndTestCases nameAndTestCases : namesAndTestCases) { + SimplePersonName name = new SimplePersonName(nameAndTestCases.nameFields); + if (forDebugging) { + System.out.println(nameAndTestCases.nameFields); + } + + for (String[] testCase : nameAndTestCases.testCases) { + ULocale formatterLocale = new ULocale(testCase[0]); + PersonNameFormatter.Length formatterLength = PersonNameFormatter.Length.valueOf(testCase[1]); + PersonNameFormatter.Usage formatterUsage = PersonNameFormatter.Usage.valueOf(testCase[2]); + PersonNameFormatter.Formality formatterFormality = PersonNameFormatter.Formality.valueOf(testCase[3]); + Set formatterOptions = makeOptionsSet(testCase[4]); + String expectedResult = testCase[5]; + + PersonNameFormatter formatter = new PersonNameFormatter(formatterLocale, formatterLength, formatterUsage, formatterFormality, formatterOptions); + String actualResult = formatter.format(name); + + if (forDebugging) { + System.out.println(" " + formatterLocale + "," + formatterLength + "," + formatterUsage + "," + formatterFormality + "," + formatterOptions + " => " + actualResult); + } else { + assertEquals("Wrong formatting result for " + nameAndTestCases.nameFields + "," + Arrays.toString(testCase), expectedResult, actualResult); + } + } + } + } + + private static Set makeOptionsSet(String optionsStr) { + Set result = new HashSet<>(); + StringTokenizer tok = new StringTokenizer(optionsStr, ","); + while (tok.hasMoreTokens()) { + String optionStr = tok.nextToken(); + PersonNameFormatter.Options option = PersonNameFormatter.Options.valueOf(optionStr); + result.add(option); + } + return result; + } + + @Test + public void TestEnglishName() { + executeTestCases(new NameAndTestCases[]{ + new NameAndTestCases("locale=en_US,prefix=Mr.,given=Richard,given-informal=Rich,given2=Theodore,surname=Gillam", new String[][] { + // test all the different combinations of parameters with the normal name order + { "en_US", "LONG", "REFERRING", "FORMAL", "", "Richard Theodore Gillam" }, + { "en_US", "LONG", "REFERRING", "INFORMAL", "", "Rich Gillam" }, + { "en_US", "LONG", "ADDRESSING", "FORMAL", "", "Mr. Gillam" }, + { "en_US", "LONG", "ADDRESSING", "INFORMAL", "", "Rich" }, + { "en_US", "MEDIUM", "REFERRING", "FORMAL", "", "Richard T. Gillam" }, + { "en_US", "MEDIUM", "REFERRING", "INFORMAL", "", "Rich Gillam" }, + { "en_US", "MEDIUM", "ADDRESSING", "FORMAL", "", "Mr. Gillam" }, + { "en_US", "MEDIUM", "ADDRESSING", "INFORMAL", "", "Rich" }, + { "en_US", "SHORT", "REFERRING", "FORMAL", "", "R. T. Gillam" }, + { "en_US", "SHORT", "REFERRING", "INFORMAL", "", "Rich G." }, + { "en_US", "SHORT", "ADDRESSING", "FORMAL", "", "Mr. Gillam" }, + { "en_US", "SHORT", "ADDRESSING", "INFORMAL", "", "Rich" }, + + // test all the different combinations of parameters for "sorting" order + { "en_US", "LONG", "REFERRING", "FORMAL", "SORTING", "Gillam, Richard Theodore" }, + { "en_US", "LONG", "REFERRING", "INFORMAL", "SORTING", "Gillam, Rich" }, + { "en_US", "MEDIUM", "REFERRING", "FORMAL", "SORTING", "Gillam, Richard T." }, + { "en_US", "MEDIUM", "REFERRING", "INFORMAL", "SORTING", "Gillam, Rich" }, + { "en_US", "SHORT", "REFERRING", "FORMAL", "SORTING", "Gillam, R. T." }, + { "en_US", "SHORT", "REFERRING", "INFORMAL", "SORTING", "Gillam, Rich" }, + + // we don't really support ADDRESSING in conjunction with SORTING-- it should always + // do the same thing as REFERRING + { "en_US", "LONG", "ADDRESSING", "FORMAL", "SORTING", "Gillam, Richard Theodore" }, + { "en_US", "LONG", "ADDRESSING", "INFORMAL", "SORTING", "Gillam, Rich" }, + { "en_US", "MEDIUM", "ADDRESSING", "FORMAL", "SORTING", "Gillam, Richard T." }, + { "en_US", "MEDIUM", "ADDRESSING", "INFORMAL", "SORTING", "Gillam, Rich" }, + { "en_US", "SHORT", "ADDRESSING", "FORMAL", "SORTING", "Gillam, R. T." }, + { "en_US", "SHORT", "ADDRESSING", "INFORMAL", "SORTING", "Gillam, Rich" }, + + // finally, try the different variations of MONOGRAM + { "en_US", "LONG", "MONOGRAM", "FORMAL", "", "RTG" }, + { "en_US", "LONG", "MONOGRAM", "INFORMAL", "", "RG" }, + { "en_US", "MEDIUM", "MONOGRAM", "FORMAL", "", "G" }, + { "en_US", "MEDIUM", "MONOGRAM", "INFORMAL", "", "R" }, + { "en_US", "SHORT", "MONOGRAM", "FORMAL", "", "G" }, + { "en_US", "SHORT", "MONOGRAM", "INFORMAL", "", "R" }, + + // and again, we don't support SORTING for monograms, so it should also do the + // same thing as GIVEN_FIRST + { "en_US", "LONG", "MONOGRAM", "FORMAL", "SORTING", "RTG" }, + { "en_US", "LONG", "MONOGRAM", "INFORMAL", "SORTING", "RG" }, + { "en_US", "MEDIUM", "MONOGRAM", "FORMAL", "SORTING", "G" }, + { "en_US", "MEDIUM", "MONOGRAM", "INFORMAL", "SORTING", "R" }, + { "en_US", "SHORT", "MONOGRAM", "FORMAL", "SORTING", "G" }, + { "en_US", "SHORT", "MONOGRAM", "INFORMAL", "SORTING", "R" }, + }) + }, false); + } + + @Test + public void TestPrefixCore() { + executeTestCases(new NameAndTestCases[]{ + new NameAndTestCases("locale=en_US,given=Willem,surname-prefix=van der,surname-core=Plas", new String[][] { + // for normal formatting, the {surname} field is just "{surname-prefix} {surname-core}" + { "en_US", "LONG", "REFERRING", "FORMAL", "", "Willem van der Plas" }, + { "en_US", "LONG", "REFERRING", "INFORMAL", "", "Willem van der Plas" }, + { "en_US", "MEDIUM", "REFERRING", "FORMAL", "", "Willem van der Plas" }, + { "en_US", "MEDIUM", "REFERRING", "INFORMAL", "", "Willem van der Plas" }, + { "en_US", "SHORT", "REFERRING", "FORMAL", "", "W. van der Plas" }, + + // for FORMAL SORTING, we sort by "surname-core", with "surname-prefix" at the end + { "en_US", "LONG", "REFERRING", "FORMAL", "SORTING", "Plas, Willem van der" }, + { "en_US", "MEDIUM", "REFERRING", "FORMAL", "SORTING", "Plas, Willem van der" }, + { "en_US", "SHORT", "REFERRING", "FORMAL", "SORTING", "Plas, W. van der" }, + + // but for INFORMAL SORTING, we keep the surname together and sort by the prefix + { "en_US", "LONG", "REFERRING", "INFORMAL", "SORTING", "van der Plas, Willem" }, + { "en_US", "MEDIUM", "REFERRING", "INFORMAL", "SORTING", "van der Plas, Willem" }, + { "en_US", "SHORT", "REFERRING", "INFORMAL", "SORTING", "van der Plas, Willem" }, + + // the default (English) logic for initials doesn't do anything special with the surname-prefix-- + // it gets initials too, which is probably wrong + { "en_US", "SHORT", "REFERRING", "INFORMAL", "", "Willem v. d. P." }, + + // and (English) monogram generation doesn't do anything special with the prefix either + { "en_US", "LONG", "MONOGRAM", "FORMAL", "", "WV" }, + { "en_US", "LONG", "MONOGRAM", "INFORMAL", "", "WV" }, + + // but Dutch monogram generation _does_ handle the prefix specially + { "nl_NL", "LONG", "MONOGRAM", "FORMAL", "", "WvP" }, + { "nl_NL", "LONG", "MONOGRAM", "INFORMAL", "", "WvP" }, + }), + new NameAndTestCases("locale=en_US,given=Willem,surname=van der Plas", new String[][] { + // if we just use the "surname" field instead of "surname-prefix" and "surname-core", everything's + // the same, except (obviously) for the cases where we were doing something special with the + // prefix and core + { "en_US", "LONG", "REFERRING", "FORMAL", "", "Willem van der Plas" }, + { "en_US", "SHORT", "REFERRING", "FORMAL", "", "W. van der Plas" }, + + // for example, SORTING works the same way regardless of formality + { "en_US", "LONG", "REFERRING", "FORMAL", "SORTING", "van der Plas, Willem" }, + { "en_US", "MEDIUM", "REFERRING", "FORMAL", "SORTING", "van der Plas, Willem" }, + { "en_US", "SHORT", "REFERRING", "FORMAL", "SORTING", "van der Plas, W." }, + { "en_US", "LONG", "REFERRING", "INFORMAL", "SORTING", "van der Plas, Willem" }, + { "en_US", "MEDIUM", "REFERRING", "INFORMAL", "SORTING", "van der Plas, Willem" }, + { "en_US", "SHORT", "REFERRING", "INFORMAL", "SORTING", "van der Plas, Willem" }, + + // and monogram generation works the same in English and Dutch + { "en_US", "LONG", "MONOGRAM", "FORMAL", "", "WV" }, + { "en_US", "LONG", "MONOGRAM", "INFORMAL", "", "WV" }, + { "nl_NL", "LONG", "MONOGRAM", "FORMAL", "", "WV" }, + { "nl_NL", "LONG", "MONOGRAM", "INFORMAL", "", "WV" }, + }), + new NameAndTestCases("locale=en_US,given=Willem,surname-prefix=van der,surname-core=Plas,surname-initial=vdP.,surname-monogram=vdP", new String[][] { + // we can work around the initial generation by providing a "surname-initial" field in the name object + { "en_US", "SHORT", "REFERRING", "INFORMAL", "", "Willem vdP." }, + + // we could also (theoretically) work around the monogram-generation problem in English in the same way + { "en_US", "LONG", "MONOGRAM", "FORMAL", "", "WVDP" }, + { "en_US", "LONG", "MONOGRAM", "INFORMAL", "", "WVDP" }, + }), + }, false); + } + + @Test + public void TestInitialGeneration() { + executeTestCases(new NameAndTestCases[]{ + new NameAndTestCases("locale=en_US,given=George,given2=Herbert Walker,surname=Bush", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "George Herbert Walker Bush" }, + { "en_US", "MEDIUM", "REFERRING", "FORMAL", "", "George H. W. Bush" }, + { "en_US", "SHORT", "REFERRING", "FORMAL", "", "G. H. W. Bush" }, + { "en_US", "SHORT", "REFERRING", "INFORMAL", "", "George B." }, + { "en_US", "LONG", "MONOGRAM", "FORMAL", "", "GHB" }, + { "en_US", "LONG", "MONOGRAM", "INFORMAL", "", "GB" }, + }), + new NameAndTestCases("locale=en_US,given=Ralph,surname=Vaughan Williams", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "Ralph Vaughan Williams" }, + { "en_US", "MEDIUM", "REFERRING", "FORMAL", "", "Ralph Vaughan Williams" }, + { "en_US", "SHORT", "REFERRING", "FORMAL", "", "R. Vaughan Williams" }, + { "en_US", "SHORT", "REFERRING", "INFORMAL", "", "Ralph V. W." }, + { "en_US", "LONG", "MONOGRAM", "FORMAL", "", "RV" }, + { "en_US", "LONG", "MONOGRAM", "INFORMAL", "", "RV" }, + }), + new NameAndTestCases("locale=en_US,given=John Paul,given2=Stephen David George,surname=Smith", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "John Paul Stephen David George Smith" }, + { "en_US", "MEDIUM", "REFERRING", "FORMAL", "", "John Paul S. D. G. Smith" }, + { "en_US", "SHORT", "REFERRING", "FORMAL", "", "J. P. S. D. G. Smith" }, + { "en_US", "SHORT", "REFERRING", "INFORMAL", "", "John Paul S." }, + { "en_US", "LONG", "MONOGRAM", "FORMAL", "", "JSS" }, + { "en_US", "LONG", "MONOGRAM", "INFORMAL", "", "JS" }, + }), + }, false); + } + + @Test + public void TestLiteralTextElision() { + executeTestCases(new NameAndTestCases[]{ + // literal text elision is difficult to test with the real locale data, although this is a start + // perhaps we could add an API for debugging that lets us pass in real pattern strings, but I'd like to stay away from that + new NameAndTestCases("locale=en_US,given=John,given2=Paul,surname=Smith,suffix=Jr.", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "John Paul Smith Jr." }, + }), + new NameAndTestCases("locale=en_US,given=John,given2=Paul,surname=Smith", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "John Paul Smith" }, + }), + new NameAndTestCases("locale=en_US,given2=Paul,surname=Smith", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "Paul Smith" }, + }), + new NameAndTestCases("locale=en_US,given2=Paul", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "Paul" }, + }), + new NameAndTestCases("locale=en_US,given=John", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "John" }, + }), + new NameAndTestCases("locale=en_US,given=John,suffix=Jr.", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "John Jr." }, + }), + }, false); + } + + @Test + public void TestMultiplePatterns() { + executeTestCases(new NameAndTestCases[]{ + // the Spanish rules have two name patterns for many of the sorting cases: one to use if the surname2 + // field is populated and one to use if not-- these allow the comma between the fields to be displayed + // in the right place. This test checks to make sure we're using the right pattern based on which + // fields are present in the actual name + new NameAndTestCases("locale=es_ES,given=Andrés,given2=Manuel,surname=López,surname2=Obrador", new String[][] { + { "es_ES", "LONG", "REFERRING", "FORMAL", "", "Andrés Manuel López Obrador" }, + { "es_ES", "LONG", "REFERRING", "FORMAL", "SORTING" , "López Obrador, Andrés Manuel" }, + }), + new NameAndTestCases("locale=es_ES,given=Andrés,given2=Manuel,surname=López", new String[][] { + { "es_ES", "LONG", "REFERRING", "FORMAL", "", "Andrés Manuel López" }, + { "es_ES", "LONG", "REFERRING", "FORMAL", "SORTING" , "López, Andrés Manuel" }, + }), + }, false); + } + + @Test + public void TestNameOrder() { + executeTestCases(new NameAndTestCases[]{ + // the name's locale is used to determine the field order. For the English name formatter, if the + // name is English, the order is GN first. If it's Japanese, it's SN first. This is true whether the + // Japanese name is written in Latin letters or Han characters + new NameAndTestCases("locale=en_US,given=Shinzo,surname=Abe", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "Shinzo Abe" }, + }), + new NameAndTestCases("locale=ja_JP,given=Shinzo,surname=Abe", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "Abe Shinzo" }, + }), + new NameAndTestCases("locale=ja_JP,given=晋三,surname=安倍", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "安倍 晋三" }, + }), + + // the name can also declare its order directly, with the optional "preferredOrder" field. If it does this, + // the value of that field holds for all formatter locales and overrides determining the order + // by looking at the name's locale + new NameAndTestCases("locale=en_US,given=Shinzo,surname=Abe,preferredOrder=surnameFirst", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "Abe Shinzo" }, + }), + new NameAndTestCases("locale=ja_JP,given=Shinzo,surname=Abe,preferredOrder=givenFirst", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "Shinzo Abe" }, + }), + }, false); + } + + @Test + public void TestCapitalizedSurname() { + executeTestCases(new NameAndTestCases[]{ + // the SURNAME_ALLCAPS option does just what it says: it causes the surname field + // to be displayed in all caps + new NameAndTestCases("locale=en_US,given=Shinzo,surname=Abe", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "Shinzo Abe" }, + { "en_US", "LONG", "REFERRING", "FORMAL", "SURNAME_ALLCAPS", "Shinzo ABE" }, + }), + new NameAndTestCases("locale=ja_JP,given=Shinzo,surname=Abe", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "Abe Shinzo" }, + { "en_US", "LONG", "REFERRING", "FORMAL", "SURNAME_ALLCAPS", "ABE Shinzo" }, + }), + }, false); + } + + @Test + public void TestNameSpacing() { + executeTestCases(new NameAndTestCases[]{ + // if the formatter locale uses spaces, the result will use its formats (complete with spaces), + // regardless of locale + new NameAndTestCases("locale=ja_JP,given=Hayao,surname=Miyazaki", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "Miyazaki Hayao" }, + }), + new NameAndTestCases("locale=ja_JP,given=駿,surname=宮崎", new String[][] { + { "en_US", "LONG", "REFERRING", "FORMAL", "", "宮崎 駿" }, + }), + + // if the formatter locale doesn't use spaces and the name's locale doesn't either, just use + // the native formatter + new NameAndTestCases("locale=ja_JP,given=駿,surname=宮崎", new String[][] { + // (the Japanese name formatter actually inserts a space even for native names) + { "ja_JP", "LONG", "REFERRING", "FORMAL", "", "宮崎 駿" }, + { "zh_CN", "LONG", "REFERRING", "FORMAL", "", "宮崎駿" }, + }), + + // if the formatter locale doesn't use spaces and the name's locale does, use the name locale's formatter, + // but if the name is still using the formatter locale's script, use the native formatter's + // "foreign space replacement" character instead of spaces + new NameAndTestCases("locale=en_US,given=Albert,surname=Einstein", new String[][] { + { "ja_JP", "LONG", "REFERRING", "FORMAL", "", "Albert Einstein" }, + { "zh_CN", "LONG", "REFERRING", "FORMAL", "", "Albert Einstein" }, + }), + new NameAndTestCases("locale=en_US,given=アルベルト,surname=アインシュタイン", new String[][] { + { "ja_JP", "LONG", "REFERRING", "FORMAL", "", "アルベルト・アインシュタイン" }, + }), + new NameAndTestCases("locale=en_US,given=阿尔伯特,surname=爱因斯坦", new String[][] { + { "zh_CN", "LONG", "REFERRING", "FORMAL", "", "阿尔伯特·爱因斯坦" }, + }), + }, false); + } + + // need tests (and implementation?) for: + // - foreign space replacement +}