ICU-22081 PersonNameFormatter tech preview

This commit is contained in:
Rich Gillam 2022-08-12 16:07:52 -07:00
parent 9acba58c49
commit 53775accd5
6 changed files with 1550 additions and 0 deletions

View file

@ -0,0 +1,156 @@
// © 2022 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package com.ibm.icu.impl.personname;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.CaseMap;
import com.ibm.icu.text.PersonNameFormatter;
import com.ibm.icu.text.SimpleFormatter;
import com.ibm.icu.util.ULocale;
import java.util.StringTokenizer;
/**
* Parent class for classes that implement field-modifier behavior.
*/
abstract class FieldModifierImpl {
public abstract String modifyField(String fieldValue);
public static FieldModifierImpl forName(PersonNameFormatter.FieldModifier modifierID, PersonNameFormatterImpl formatterImpl) {
switch (modifierID) {
case INFORMAL:
return NOOP_MODIFIER;
case PREFIX:
return NULL_MODIFIER;
case CORE:
return NOOP_MODIFIER;
case ALL_CAPS:
return new AllCapsModifier(formatterImpl.getLocale());
case INITIAL_CAP:
return new InitialCapModifier(formatterImpl.getLocale());
case INITIAL:
return new InitialModifier(formatterImpl.getInitialPattern(), formatterImpl.getInitialSequencePattern());
case MONOGRAM:
return MONOGRAM_MODIFIER;
default:
throw new IllegalArgumentException("Invalid modifier ID " + modifierID);
}
}
/**
* A field modifier that just returns the field value unmodified. This is used to implement the default
* behavior of the "informal" and "core" modifiers ("real" informal or core variants have to be supplied or
* calculated by the PersonName object).
*/
private static final FieldModifierImpl NOOP_MODIFIER = new FieldModifierImpl() {
@Override
public String modifyField(String fieldValue) {
return fieldValue;
}
};
/**
* A field modifier that just returns the empty string. This is used to implement the default behavior of the
* "prefix" modifier ("real" prefix variants have to be supplied to calculated by the PersonName object).
*/
private static final FieldModifierImpl NULL_MODIFIER = new FieldModifierImpl() {
@Override
public String modifyField(String fieldValue) {
return "";
}
};
/**
* A field modifier that returns the field value converted to ALL CAPS. This is the default behavior
* for the "allCaps" modifier.
*/
private static class AllCapsModifier extends FieldModifierImpl {
private final ULocale locale;
public AllCapsModifier(ULocale locale) {
this.locale = locale;
}
@Override
public String modifyField(String fieldValue) {
return UCharacter.toUpperCase(locale, fieldValue);
}
}
/**
* A field modifier that returns the field value with the first letter of each word capitalized. This is
* the default behavior of the "initialCap" modifier.
*/
private static class InitialCapModifier extends FieldModifierImpl {
private final ULocale locale;
private static final CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE = CaseMap.toTitle().wholeString().noLowercase();
public InitialCapModifier(ULocale locale) {
this.locale = locale;
}
@Override
public String modifyField(String fieldValue) {
return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, fieldValue);
}
}
/**
* A field modifier that returns the field value converted into one or more initials. This is the first grapheme
* cluster of each word in the field value, modified using the initialPattern/initial resource value from the
* locale data, and strung together using the initialPattern/initialSequence resource value from the locale data.
* (In English, these patterns put periods after each initial and connect them with spaces.)
* This is default behavior of the "initial" modifier.
*/
private static class InitialModifier extends FieldModifierImpl {
private final SimpleFormatter initialFormatter;
private final SimpleFormatter initialSequenceFormatter;
public InitialModifier(String initialPattern, String initialSequencePattern) {
this.initialFormatter = SimpleFormatter.compile(initialPattern);
this.initialSequenceFormatter = SimpleFormatter.compile(initialSequencePattern);
}
@Override
public String modifyField(String fieldValue) {
String result = null;
StringTokenizer tok = new StringTokenizer(fieldValue, " ");
while (tok.hasMoreTokens()) {
String curInitial = getFirstGrapheme(tok.nextToken());
if (result == null) {
result = initialFormatter.format(curInitial);
} else {
result = initialSequenceFormatter.format(result, initialFormatter.format(curInitial));
}
}
return result;
}
}
/**
* A field modifier that simply returns the first grapheme cluster in the field value.
* This is the default implementation of the "monogram" modifier.
*/
private static final FieldModifierImpl MONOGRAM_MODIFIER = new FieldModifierImpl() {
@Override
public String modifyField(String fieldValue) {
return getFirstGrapheme(fieldValue);
}
};
/**
* A utility function that just returns the first grapheme cluster in the string.
*/
private static String getFirstGrapheme(String s) {
// early out if the string is empty to avoid StringIndexOutOfBoundsException
if (s.isEmpty()) {
return "";
}
// (currently, no locale overrides the grapheme-break rules, so we just use "root" instead of passing in the locale)
BreakIterator bi = BreakIterator.getCharacterInstance(ULocale.ROOT);
bi.setText(s);
return s.substring(0, bi.next());
}
}

View file

@ -0,0 +1,251 @@
// © 2022 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package com.ibm.icu.impl.personname;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.FormattedValue;
import com.ibm.icu.text.PersonNameFormatter;
import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.UResourceBundle;
import java.util.*;
import static com.ibm.icu.util.UResourceBundle.ARRAY;
import static com.ibm.icu.util.UResourceBundle.STRING;
/**
* Actual implementation class for PersonNameFormatter.
*/
public class PersonNameFormatterImpl {
private final ULocale locale;
private final PersonNamePattern[] gnFirstPatterns;
private final PersonNamePattern[] snFirstPatterns;
private final Set<String> gnFirstLocales;
private final Set<String> snFirstLocales;
private final String initialPattern;
private final String initialSequencePattern;
private final boolean capitalizeSurname;
private final String foreignSpaceReplacement;
private final boolean formatterLocaleUsesSpaces;
private final PersonNameFormatter.Length length;
private final PersonNameFormatter.Usage usage;
private final PersonNameFormatter.Formality formality;
private final Set<PersonNameFormatter.Options> options;
public PersonNameFormatterImpl(ULocale locale,
PersonNameFormatter.Length length,
PersonNameFormatter.Usage usage,
PersonNameFormatter.Formality formality,
Set<PersonNameFormatter.Options> options) {
// null for `options` is the same as the empty set
if (options == null) {
options = new HashSet<>();
}
// save off our creation parameters (these are only used if we have to create a second formatter)
this.length = length;
this.usage = usage;
this.formality = formality;
this.options = options;
// load simple property values from the resource bundle (or the options set)
ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME, locale);
this.locale = locale;
this.initialPattern = rb.getStringWithFallback("personNames/initialPattern/initial");
this.initialSequencePattern = rb.getStringWithFallback("personNames/initialPattern/initialSequence");
this.capitalizeSurname = options.contains(PersonNameFormatter.Options.SURNAME_ALLCAPS);
this.foreignSpaceReplacement = rb.getStringWithFallback("personNames/foreignSpaceReplacement");
this.formatterLocaleUsesSpaces = !LOCALES_THAT_DONT_USE_SPACES.contains(locale.getLanguage());
// asjust for combinations of parameters that don't make sense in practice
if (usage == PersonNameFormatter.Usage.MONOGRAM) {
// we don't support SORTING in conjunction with MONOGRAM; if the caller passes in SORTING, remove it from
// the options list
options.remove(PersonNameFormatter.Options.SORTING);
} else if (options.contains(PersonNameFormatter.Options.SORTING)) {
// we only support SORTING in conjunction with REFERRING; if the caller passes in ADDRESSING, treat it
// the same as REFERRING
usage = PersonNameFormatter.Usage.REFERRING;
}
// load the actual formatting patterns-- since we don't know the name order until formatting time (it can be
// different for different names), load patterns for both GN-first and SN-first names. (If the user has
// specified SORTING, we don't need to do this-- we just load the "sorting" patterns and ignore the name's order.)
final String RESOURCE_PATH_PREFIX = "personNames/namePattern/";
String resourceNameBody = length.toString().toLowerCase() + "-" + usage.toString().toLowerCase() + "-"
+ formality.toString().toLowerCase();
if (!options.contains(PersonNameFormatter.Options.SORTING)) {
ICUResourceBundle gnFirstResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "givenFirst-" + resourceNameBody);
ICUResourceBundle snFirstResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "surnameFirst-" + resourceNameBody);
gnFirstPatterns = PersonNamePattern.makePatterns(asStringArray(gnFirstResource), this);
snFirstPatterns = PersonNamePattern.makePatterns(asStringArray(snFirstResource), this);
gnFirstLocales = new HashSet<>();
Collections.addAll(gnFirstLocales, asStringArray(rb.getWithFallback("personNames/nameOrderLocales/givenFirst")));
snFirstLocales = new HashSet<>();
Collections.addAll(snFirstLocales, asStringArray(rb.getWithFallback("personNames/nameOrderLocales/surnameFirst")));
} else {
ICUResourceBundle patternResource = rb.getWithFallback(RESOURCE_PATH_PREFIX + "sorting-" + resourceNameBody);
gnFirstPatterns = PersonNamePattern.makePatterns(asStringArray(patternResource), this);
snFirstPatterns = null;
gnFirstLocales = null;
snFirstLocales = null;
}
}
public String format(PersonNameFormatter.PersonName name) {
// TODO: Should probably return a FormattedPersonName object
// if the formatter is for a language that doesn't use spaces between words and the name is from a language
// that does, create a formatter for the NAME'S locale and use THAT to format the name
ULocale nameLocale = name.getNameLocale();
boolean nameLocaleUsesSpaces = !LOCALES_THAT_DONT_USE_SPACES.contains(nameLocale.getLanguage());
if (!formatterLocaleUsesSpaces && nameLocaleUsesSpaces) {
PersonNameFormatterImpl nativeFormatter = new PersonNameFormatterImpl(nameLocale, this.length,
this.usage, this.formality, this.options);
String result = nativeFormatter.format(name);
// BUT, if the name is actually written in the formatter locale's script, replace any spaces in the name
// with the foreignSpaceReplacement character
if (!foreignSpaceReplacement.equals(" ") && scriptMatchesLocale(result, this.locale)) {
result = result.replace(" ", this.foreignSpaceReplacement);
}
return result;
}
// if we get down to here, we're just doing normal formatting-- if we have both GN-first and SN-first rules,
// choose which one to use based on the name's locale and preferred field order
if (snFirstPatterns == null || nameIsGnFirst(name)) {
return getBestPattern(gnFirstPatterns, name).format(name);
} else {
return getBestPattern(snFirstPatterns, name).format(name);
}
}
public ULocale getLocale() {
return locale;
}
public String getInitialPattern() {
return initialPattern;
}
public String getInitialSequencePattern() {
return initialSequencePattern;
}
public boolean shouldCapitalizeSurname() {
return capitalizeSurname;
}
private final Set<String> LOCALES_THAT_DONT_USE_SPACES = new HashSet<>(Arrays.asList("ja", "zh", "th", "yue"));
/**
* Returns the value of the resource, as a string array.
* @param resource An ICUResourceBundle of type STRING or ARRAY. If ARRAY, this function just returns it
* as a string array. If STRING, it returns a one-element array containing that string.
* @return The resource's value, as an array of Strings.
*/
private String[] asStringArray(ICUResourceBundle resource) {
if (resource.getType() == STRING) {
return new String[] { resource.getString() };
} else if (resource.getType() == ARRAY){
return resource.getStringArray();
} else {
throw new IllegalStateException("Unsupported resource type " + resource.getType());
}
}
/**
* Returns the field order to use when formatting this name, taking into account the name's preferredOrder
* field, as well as the name and formatter's respective locales.
* @param name The name to be formatted.
* @return If true, use GN-first order to format the name; if false, use SN-first order.
*/
private boolean nameIsGnFirst(PersonNameFormatter.PersonName name) {
// the name can declare its order-- check that first (it overrides any locale-based calculation)
Set<PersonNameFormatter.FieldModifier> modifiers = new HashSet<>();
String preferredOrder = name.getFieldValue(PersonNameFormatter.NameField.PREFERRED_ORDER, modifiers);
if (preferredOrder != null) {
if (preferredOrder.equals("givenFirst")) {
return true;
} else if (preferredOrder.equals("surnameFirst")) {
return false;
} else {
throw new IllegalArgumentException("Illegal preferredOrder value " + preferredOrder);
}
}
String localeStr = name.getNameLocale().toString();
do {
if (gnFirstLocales.contains(localeStr)) {
return true;
} else if (snFirstLocales.contains(localeStr)) {
return false;
}
int lastUnderbarPos = localeStr.lastIndexOf("_");
if (lastUnderbarPos >= 0) {
localeStr = localeStr.substring(0, lastUnderbarPos);
} else {
localeStr = "root";
}
} while (!localeStr.equals("root"));
// should never get here-- "root" should always be in one of the locales
return true;
}
private PersonNamePattern getBestPattern(PersonNamePattern[] patterns, PersonNameFormatter.PersonName name) {
// early out if there's only one pattern
if (patterns.length == 1) {
return patterns[0];
} else {
// if there's more than one pattern, return the one that contains the greatest number of fields that
// actually have values in `name`. If there's a tie, return the pattern that contains the lowest number
// of fields that DON'T have values in `name`.
int maxPopulatedFields = 0;
int minEmptyFields = Integer.MAX_VALUE;
PersonNamePattern bestPattern = null;
for (PersonNamePattern pattern : patterns) {
int populatedFields = pattern.numPopulatedFields(name);
int emptyFields = pattern.numEmptyFields(name);
if (populatedFields > maxPopulatedFields) {
maxPopulatedFields = populatedFields;
minEmptyFields = emptyFields;
bestPattern = pattern;
} else if (populatedFields == maxPopulatedFields && emptyFields < minEmptyFields) {
minEmptyFields = emptyFields;
bestPattern = pattern;
}
}
return bestPattern;
}
}
/**
* Returns true if the script of `s` is one of the default scripts for `locale`.
* This function only checks the script of the first character whose script isn't "common,"
* so it probably won't work right on mixed-script strings.
*/
private boolean scriptMatchesLocale(String s, ULocale locale) {
int[] localeScripts = UScript.getCode(locale);
int stringScript = UScript.COMMON;
for (int i = 0; stringScript == UScript.COMMON && i < s.length(); i++) {
char c = s.charAt(i);
stringScript = UScript.getScript(c);
}
for (int localeScript : localeScripts) {
if (localeScript == stringScript) {
return true;
}
}
return false;
}
}

View file

@ -0,0 +1,269 @@
// © 2022 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package com.ibm.icu.impl.personname;
import com.ibm.icu.text.PersonNameFormatter;
import java.util.*;
/**
* A single name formatting pattern, corresponding to a single namePattern element in CLDR.
*/
class PersonNamePattern {
private String patternText; // for debugging
private Element[] patternElements;
public static PersonNamePattern[] makePatterns(String[] patternText, PersonNameFormatterImpl formatterImpl) {
PersonNamePattern[] result = new PersonNamePattern[patternText.length];
for (int i = 0; i < patternText.length; i++) {
result[i] = new PersonNamePattern(patternText[i], formatterImpl);
}
return result;
}
private PersonNamePattern(String patternText, PersonNameFormatterImpl formatterImpl) {
this.patternText = patternText;
List<Element> elements = new ArrayList<>();
boolean inField = false;
boolean inEscape = false;
StringBuilder workingString = new StringBuilder();
for (int i = 0; i < patternText.length(); i++) {
char c = patternText.charAt(i);
if (inEscape) {
workingString.append(c);
inEscape = false;
} else {
switch (c) {
case '\\':
inEscape = true;
break;
case '{':
if (!inField) {
if (workingString.length() > 0) {
elements.add(new LiteralText(workingString.toString()));
workingString = new StringBuilder();
}
inField = true;
} else {
throw new IllegalArgumentException("Nested braces are not allowed in name patterns");
}
break;
case '}':
if (inField) {
if (workingString.length() > 0) {
elements.add(new NameFieldImpl(workingString.toString(), formatterImpl));
workingString = new StringBuilder();
} else {
throw new IllegalArgumentException("No field name inside braces");
}
inField = false;
} else {
throw new IllegalArgumentException("Unmatched closing brace in literal text");
}
break;
default:
workingString.append(c);
}
}
}
if (workingString.length() > 0) {
elements.add(new LiteralText(workingString.toString()));
}
this.patternElements = elements.toArray(new Element[0]);
}
public String format(PersonNameFormatter.PersonName name) {
StringBuilder result = new StringBuilder();
boolean seenLeadingField = false;
boolean seenEmptyLeadingField = false;
boolean seenEmptyField = false;
StringBuilder textBefore = new StringBuilder();
StringBuilder textAfter = new StringBuilder();
// the logic below attempts to implement the following algorithm:
// - If one or more fields at the beginning of the name are empty, also skip all literal text
// from the beginning of the name up to the first populated field.
// - If one or more fields at the end of the name are empty, also skip all literal text from
// the last populated field to the end of the name.
// - If one or more contiguous fields in the middle of the name are empty, skip the literal text
// between them, omit characters from the literal text on either side of the empty fields up to
// the first space on either side, and make sure that the resulting literal text doesn't end up
// with two spaces in a row.
for (Element element : patternElements) {
if (element.isLiteral()) {
if (seenEmptyLeadingField) {
// do nothing; throw away the literal text
} else if (seenEmptyField) {
textAfter.append(element.format(name));
} else {
textBefore.append(element.format(name));
}
} else {
String fieldText = element.format(name);
if (fieldText == null || fieldText.isEmpty()) {
if (!seenLeadingField) {
seenEmptyLeadingField = true;
textBefore.setLength(0);
} else {
seenEmptyField = true;
textAfter.setLength(0);
}
} else {
seenLeadingField = true;
seenEmptyLeadingField = false;
if (seenEmptyField) {
result.append(coalesce(textBefore, textAfter));
result.append(fieldText);
seenEmptyField = false;
} else {
result.append(textBefore);
textBefore.setLength(0);
result.append(element.format(name));
}
}
}
}
if (!seenEmptyField) {
result.append(textBefore);
}
return result.toString();
}
public int numPopulatedFields(PersonNameFormatter.PersonName name) {
int result = 0;
for (Element element : patternElements) {
result += element.isPopulated(name) ? 1 : 0;
}
return result;
}
public int numEmptyFields(PersonNameFormatter.PersonName name) {
int result = 0;
for (Element element : patternElements) {
result += element.isPopulated(name) ? 0 : 1;
}
return result;
}
/**
* Stitches together the literal text on either side of an omitted field by deleting any
* non-whitespace characters immediately neighboring the omitted field and coalescing any
* adjacent spaces at the join point down to one.
* @param s1 The literal text before the omitted field.
* @param s2 The literal text after the omitted field.
*/
private String coalesce(StringBuilder s1, StringBuilder s2) {
// get the range of non-whitespace characters at the beginning of s1
int p1 = 0;
while (p1 < s1.length() && !Character.isWhitespace(s1.charAt(p1))) {
++p1;
}
// get the range of non-whitespace characters at the end of s2
int p2 = s2.length() - 1;
while (p2 >= 0 && !Character.isWhitespace(s2.charAt(p2))) {
--p2;
}
// also include one whitespace character from s1 or, if there aren't
// any, one whitespace character from s2
if (p1 < s1.length()) {
++p1;
} else if (p2 >= 0) {
--p2;
}
// concatenate those two ranges to get the coalesced literal text
String result = s1.substring(0, p1) + s2.substring(p2 + 1);
// clear out s1 and s2 (done here to improve readability in format() above))
s1.setLength(0);
s2.setLength(0);
return result;
}
/**
* A single element in a NamePattern. This is either a name field or a range of literal text.
*/
private interface Element {
boolean isLiteral();
String format(PersonNameFormatter.PersonName name);
boolean isPopulated(PersonNameFormatter.PersonName name);
}
/**
* Literal text from a name pattern.
*/
private static class LiteralText implements Element {
private String text;
public LiteralText(String text) {
this.text = text;
}
public boolean isLiteral() {
return true;
}
public String format(PersonNameFormatter.PersonName name) {
return text;
}
public boolean isPopulated(PersonNameFormatter.PersonName name) {
return false;
}
}
/**
* An actual name field in a NamePattern (i.e., the stuff represented in the pattern by text
* in braces). This class actually handles fetching the value for the field out of a
* PersonName object and applying any modifiers to it.
*/
private static class NameFieldImpl implements Element {
private PersonNameFormatter.NameField fieldID;
private Map<PersonNameFormatter.FieldModifier, FieldModifierImpl> modifiers;
public NameFieldImpl(String fieldNameAndModifiers, PersonNameFormatterImpl formatterImpl) {
List<PersonNameFormatter.FieldModifier> modifierIDs = new ArrayList<>();
StringTokenizer tok = new StringTokenizer(fieldNameAndModifiers, "-");
this.fieldID = PersonNameFormatter.NameField.forString(tok.nextToken());
while (tok.hasMoreTokens()) {
modifierIDs.add(PersonNameFormatter.FieldModifier.forString(tok.nextToken()));
}
if (this.fieldID == PersonNameFormatter.NameField.SURNAME && formatterImpl.shouldCapitalizeSurname()) {
modifierIDs.add(PersonNameFormatter.FieldModifier.ALL_CAPS);
}
this.modifiers = new HashMap<>();
for (PersonNameFormatter.FieldModifier modifierID : modifierIDs) {
this.modifiers.put(modifierID, FieldModifierImpl.forName(modifierID, formatterImpl));
}
}
public boolean isLiteral() {
return false;
}
public String format(PersonNameFormatter.PersonName name) {
Set<PersonNameFormatter.FieldModifier> modifierIDs = new HashSet<>(modifiers.keySet());
String result = name.getFieldValue(fieldID, modifierIDs);
if (result != null) {
for (PersonNameFormatter.FieldModifier modifierID : modifierIDs) {
result = modifiers.get(modifierID).modifyField(result);
}
}
return result;
}
public boolean isPopulated(PersonNameFormatter.PersonName name) {
// just check whether the unmodified field contains a value
Set<PersonNameFormatter.FieldModifier> modifierIDs = new HashSet<>();
String fieldValue = name.getFieldValue(fieldID, modifierIDs);
return fieldValue != null && !fieldValue.isEmpty();
}
}
}

View file

@ -0,0 +1,370 @@
// © 2022 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package com.ibm.icu.text;
import com.ibm.icu.impl.personname.PersonNameFormatterImpl;
import com.ibm.icu.util.ULocale;
import java.util.Set;
/**
* A class for formatting names of people. Takes raw name data for a person and renders it into a string according to
* the caller's specifications, taking into account how people's names are rendered in the caller's locale.
*
* The Length, Usage, and Formality options can be used to get a wide variety of results. In English, they would
* produce results along these lines:
*
* | | REFERRING | REFERRING | ADDRESSING | ADDRESSING | MONOGRAM | MONOGRAM |
* | | FORMAL | INFORMAL | FORMAL | INFORMAL | FORMAL | INFORMAL |
* |--------|-----------------------|--------------|------------|------------|----------|----------|
* | LONG | James Earl Carter Jr. | Jimmy Carter | Mr. Carter | Jimmy | JEC | JC |
* | MEDIUM | James E. Carter Jr. | Jimmy Carter | Mr. Carter | Jimmy | C | J |
* | SHORT | J. E. Carter | Jimmy Carter | Mr. Carter | Jimmy | C | J |
*
* @internal
*/
public class PersonNameFormatter {
//==============================================================================
// Parameters that control formatting behavior
/**
* Specifies the desired length of the formatted name.
* @internal
*/
public enum Length {
/**
* The longest name length. Generally uses most of the fields in the name object.
* @internal
*/
LONG,
/**
* The most typical name length. Generally includes the given name and surname, but generally
* nost most of the other fields.
* @internal
*/
MEDIUM,
/**
* A shortened name. Skips most fields and may abbreviate some name fields to just their initials.
* When Formality is INFORMAL, may only include one field.
*/
SHORT
}
/**
* Specifies the intended usage of the formatted name.
* @internal
*/
public enum Usage {
/**
* Used for when the name is going to be used to address the user directly: "Turn left here, John."
* @internal
*/
ADDRESSING,
/**
* Used in general cases, when the name is used to refer to somebody else.
* @internal
*/
REFERRING,
/**
* Used to generate monograms, short 1 to 3-character versions of the name suitable for use in things
* like chat avatars. In English, this is usually the person's initials, but this isn't true in all
* languages. When the caller specifies Usage.MONOGRAM, the Length parameter can be used to get different
* lengths of monograms: Length.SHORT is generally a single letter; Length.LONG may be as many as three or four.
* @internal
*/
MONOGRAM
}
/**
* Specifies the intended formality of the formatted name.
* @internal
*/
public enum Formality {
/**
* The more formal version of the name.
* @internal
*/
FORMAL,
/**
* The more informal version of the name. In English, this might omit fields or use the "informal" variant
* of the given name.
* @internal
*/
INFORMAL
}
/**
* Additional options to customize the behavior of the formatter.
* @internal
*/
public enum Options {
/**
* Causes the formatter to generate results suitable for inclusion in a sorted list. For GN-first languages,
* this generally means moving the surname to the beginning of the string, with a comma between it and
* the rest of the name: e.g., "Carter, James E. Jr.".
* @internal
*/
SORTING,
/**
* Requests that the surname in the formatted result be rendered in ALL CAPS. This is often done with
* Japanese names to highlight which name is the surname.
* @internal
*/
SURNAME_ALLCAPS
}
//==============================================================================
// Identifiers used to request field values from the PersonName object
/**
* Identifiers for the name fields supported by the PersonName object.
* @internal
*/
public enum NameField {
/**
* Contains titles and other words that precede the actual name, such as "Mr."
* @internal
*/
PREFIX("prefix"),
/**
* The given name. May contain more than one token.
* @internal
*/
GIVEN("given"),
/**
* Additional given names. (In English, this is usually the "middle name" and
* may contain more than one word.)
* @internal
*/
GIVEN2("given2"),
/**
* The surname. In Spanish, this is the patronymic surname.
* @internal
*/
SURNAME("surname"),
/**
* Additional surnames. This is only used in a few languages, such as Spanish,
* where it is the matronymic surname. (In most languages, multiple surnames all
* just go in the SURNAME field.)
* @internal
*/
SURNAME2("surname2"),
/**
* Generational and professional qualifiers that generally follow the actual name,
* such as "Jr." or "M.D."
* @internal
*/
SUFFIX("suffix"),
/**
* The preferred field order for the name. PersonName objects generally shouldn't provide
* this field, allowing the PersonNameFormatter to deduce the proper field order based on
* the locales of the name of the formatter. But this can be used to force a particular
* field order, generally in cases where the deduction logic in PersonNameFormatter would
* guess wrong. When used, the only valid values are "givenFirst" and "surnameFirst".
* @internal
*/
PREFERRED_ORDER("preferredOrder");
private final String name;
private NameField(String name) {
this.name = name;
}
/**
* Returns the NameField's display name.
* @internal
*/
@Override
public String toString() {
return name;
}
/**
* Returns the appropriate NameField for its display name.
* @internal
*/
public static NameField forString(String name) {
for (NameField field : values()) {
if (field.name.equals(name)) {
return field;
}
}
throw new IllegalArgumentException("Invalid field name " + name);
}
}
/**
* Identifiers for the name field modifiers supported by the PersonName and PersonNameFormatter objects.
* @internal
*/
public enum FieldModifier {
/**
* Requests an "informal" variant of the field, generally a nickname of some type:
* if "given" is "James", "given-informal" might be "Jimmy". Only applied to the "given"
* field. If the PersonName object doesn't apply this modifier, PersonNameFormatter just
* uses the unmodified version of "given".
* @internal
*/
INFORMAL("informal"),
/**
* If the field contains a main word with one or more separate prefixes, such as
* "van den Hul", this requests just the prefixes ("van den"). Only applied to the "surname"
* field. If the PersonName object doesn't apply this modifier, PersonNameFormatter
* assumes there are no prefixes.
* @internal
*/
PREFIX("prefix"),
/**
* If the field contains a main word with one or more separate prefixes, such as
* "van den Hul", this requests just the main word ("Hul"). Only applied to the "surname"
* field. If the implementing class doesn't apply this modifier, PersonNameFormatter
* assumes the entire "surname" field is the "core".
* @internal
*/
CORE("core"),
/**
* Requests an initial for the specified field. PersonNameFormatter will do
* this algorithmically, but a PersonName object can apply this modifier itself if it wants
* different initial-generation logic (or stores the initial separately).
* @internal
*/
INITIAL("initial"),
/**
* Requests an initial for the specified field, suitable for use in a monogram
* (this usually differs from "initial" in that "initial" adds a period and "monogram" doesn't).
* PersonNameFormatter will do this algorithmically, but a PersonName object can apply
* this modifier itself if it wants different monogram-generation logic.
* @internal
*/
MONOGRAM("monogram"),
/**
* Requests the field value converted to ALL CAPS. PersonName objects
* generally won't need to handle this modifier themselves.
* @internal
*/
ALL_CAPS("allCaps"),
/**
* Requests the field value with the first letter of each word capitalized.
* A PersonName object might handle this modifier itself to capitalize words more
* selectively.
* @internal
*/
INITIAL_CAP("initialCap");
private final String name;
private FieldModifier(String name) {
this.name = name;
}
/**
* Returns the FieldModifier's display name.
* @internal
*/
@Override
public String toString() {
return name;
}
/**
* Returns the appropriate fieldModifier for its display name.
* @internal
*/
public static FieldModifier forString(String name) {
for (FieldModifier modifier : values()) {
if (modifier.name.equals(name)) {
return modifier;
}
}
throw new IllegalArgumentException("Invalid modifier name " + name);
}
}
//==============================================================================
// The PersonName object
/**
* An object used to provide name data to the PersonNameFormatter for formatting.
* Clients can implement this interface to talk directly to some other subsystem
* that actually contains the name data (instead of having to copy it into a separate
* object just for formatting) or to override the default modifier behavior described
* above. A concrete SimplePersonName object that does store the field values directly
* is provided.
* @internal
* @see SimplePersonName
*/
public interface PersonName {
/**
* Returns the locale of the name-- that is, the language or country of origin for the person being named.
* @return The name's locale.
* @internal
*/
public ULocale getNameLocale();
/**
* Returns one field of the name, possibly in a modified form.
* @param identifier The identifier of the requested field.
* @param modifiers An **IN/OUT** parameter that specifies modifiers to apply to the basic field value.
* An implementing class can choose to handle or ignore any modifiers; it should modify
* this parameter so that on exit, it contains only the requested modifiers that it
* DIDN'T handle.
* @return The value of the requested field, optionally modified by some or all of the requested modifiers, or
* null if the requested field isn't present in the name.
* @internal
*/
public String getFieldValue(NameField identifier, Set<FieldModifier> modifiers);
}
private final PersonNameFormatterImpl impl;
//==============================================================================
// Public API on PersonNameFormatter
/**
* Constructs a PersonNameFormatter.
* @param locale The target locale for formatted names.
* @param length The requested length.
* @param usage The requested usage.
* @param formality The requested formality.
* @param options A set containing additional formatting options. May be null.
* @see Length
* @see Usage
* @see Formality
* @see Options
* @internal
*/
public PersonNameFormatter(ULocale locale, Length length, Usage usage, Formality formality, Set<Options> options) {
this.impl = new PersonNameFormatterImpl(locale, length, usage, formality, options);
}
/**
* Formats a name.
* @param name A PersonName object that supplies individual field values (optionally, with modifiers applied)
* to the formatter for formatting.
* @return The name, formatted according to the locale and other parameters passed to the formatter's constructor.
* @internal
*/
public String format(PersonName name) {
// TODO: Should probably return a FormattedPersonName object
return impl.format(name);
}
}

View file

@ -0,0 +1,163 @@
// © 2022 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package com.ibm.icu.text;
import com.ibm.icu.util.ULocale;
import java.util.*;
/**
* A concrete implementation of PersonNameFormatter.PersonName that simply stores the field
* values in a Map.
*
* A caller can store both raw field values (such as "given") and modified field values (such as "given-informal")
* in a SimplePersonName. But beyond storing and returning modified field values provided to it by the caller,
* SimplePersonName relies on the PersonNameFormatter's default handling of field modifiers.
* @internal
*/
public class SimplePersonName implements PersonNameFormatter.PersonName {
/**
* Simple constructor.
* @param nameLocale The locale of the name (i.e., its ethnic or national origin).
* @param fieldValues A Map mapping from field names to field values. The field names
* are the values returned by NameField.toString().
* @internal
*/
public SimplePersonName(ULocale nameLocale, Map<String, String> fieldValues) {
this.nameLocale = nameLocale;
this.fieldValues = new HashMap<>(fieldValues);
}
/**
* A constructor that takes the locale ID and field values as a single String. This constructor is really
* intended only for the use of the PersonNameFormatter unit tests.
* @param keysAndValues A single string containing the locale ID and field values. This string is organized
* into key-value pairs separated by commas. The keys are separated from the values
* by equal signs. The keys themselves are field names, as returned by
* NameField.toString(), optionally followed by a hyphen-delimited set of modifier names,
* as returned by FieldModifier.toString().
* @internal
*/
public SimplePersonName(String keysAndValues) {
this.fieldValues = new HashMap<>();
StringTokenizer tok = new StringTokenizer(keysAndValues, ",");
ULocale tempLocale = null;
while (tok.hasMoreTokens()) {
String entry = tok.nextToken();
int equalPos = entry.indexOf('=');
if (equalPos < 0) {
throw new IllegalArgumentException("No = found in name field entry");
}
String fieldName = entry.substring(0, equalPos);
String fieldValue = entry.substring(equalPos + 1);
if (fieldName.equals("locale")) {
tempLocale = new ULocale(fieldValue);
} else {
this.fieldValues.put(fieldName, fieldValue);
}
}
this.nameLocale = tempLocale;
// special-case code for the "surname" field-- if it isn't specified, but "surname-prefix" and
// "surname-core" both are, let "surname" be the other two fields joined with a space
if (this.fieldValues.get("surname") == null) {
String surnamePrefix = this.fieldValues.get("surname-prefix");
String surnameCore = this.fieldValues.get("surname-core");
if (surnamePrefix != null && surnameCore != null) {
this.fieldValues.put("surname", surnamePrefix + " " + surnameCore);
}
}
}
/**
* Returns the locale of the name-- that is, the language or country of origin for the person being named.
* @return The name's locale.
* @internal
*/
@Override
public ULocale getNameLocale() {
return nameLocale;
}
/**
* Returns one field of the name, possibly in a modified form. This class can store modified versions of fields,
* provided at construction time, and this function will return them. Otherwise, it ignores modifiers and
* relies on PersonNameFormat's default modifier handling.
* @param nameField The identifier of the requested field.
* @param modifiers An **IN/OUT** parameter that specifies modifiers to apply to the basic field value.
* On return, this list will contain any modifiers that this object didn't handle. This class
* will always return this set unmodified, unless a modified version of the requested field
* was provided at construction time.
* @return The value of the requested field, optionally modified by some or all of the requested modifiers, or
* null if the requested field isn't present in the name.
* @internal
*/
@Override
public String getFieldValue(PersonNameFormatter.NameField nameField, Set<PersonNameFormatter.FieldModifier> modifiers) {
// first look for the fully modified name in the internal table
String fieldName = nameField.toString();
String result = fieldValues.get(makeModifiedFieldName(nameField, modifiers));
if (result != null) {
modifiers.clear();
return result;
}
// if we don't find it, check the fully unmodified name. If it's not there, nothing else will be
result = fieldValues.get(fieldName);
if (result == null) {
return null;
} else if (modifiers.size() == 1) {
// and if it IS there and there's only one modifier, we're done
return result;
}
// but if there are two or more modifiers, then we have to go through the whole list of fields and look for the best match
String winningKey = fieldName;
int winningScore = 0;
for (String key : fieldValues.keySet()) {
if (key.startsWith(fieldName)) {
Set<PersonNameFormatter.FieldModifier> keyModifiers = makeModifiersFromName(key);
if (modifiers.containsAll(keyModifiers)) {
if (keyModifiers.size() > winningScore || (keyModifiers.size() == winningScore && key.compareTo(winningKey) < 0)) {
winningKey = key;
winningScore = keyModifiers.size();
}
}
}
}
result = fieldValues.get(winningKey);
modifiers.removeAll(makeModifiersFromName(winningKey));
return result;
}
private static String makeModifiedFieldName(PersonNameFormatter.NameField fieldName,
Collection<PersonNameFormatter.FieldModifier> modifiers) {
StringBuilder result = new StringBuilder();
result.append(fieldName);
TreeSet<String> sortedModifierNames = new TreeSet<>();
for (PersonNameFormatter.FieldModifier modifier : modifiers) {
sortedModifierNames.add(modifier.toString());
}
for (String modifierName : sortedModifierNames) {
result.append("-");
result.append(modifierName);
}
return result.toString();
}
private static Set<PersonNameFormatter.FieldModifier> makeModifiersFromName(String modifiedName) {
StringTokenizer tok = new StringTokenizer(modifiedName, "-");
Set<PersonNameFormatter.FieldModifier> result = new HashSet<>();
String fieldName = tok.nextToken(); // throw away the field name
while (tok.hasMoreTokens()) {
result.add(PersonNameFormatter.FieldModifier.forString(tok.nextToken()));
}
return result;
}
private final ULocale nameLocale;
private final Map<String, String> fieldValues;
}

View file

@ -0,0 +1,341 @@
// © 2022 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package com.ibm.icu.dev.test.format;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.text.PersonNameFormatter;
import com.ibm.icu.text.SimplePersonName;
import com.ibm.icu.util.ULocale;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import java.util.*;
@RunWith(JUnit4.class)
public class PersonNameFormatterTest extends TestFmwk{
private static class NameAndTestCases {
public String nameFields;
public String[][] testCases;
public NameAndTestCases(String nameFields, String[][] testCases) {
this.nameFields = nameFields;
this.testCases = testCases;
}
}
private void executeTestCases(NameAndTestCases[] namesAndTestCases, boolean forDebugging) {
for (NameAndTestCases nameAndTestCases : namesAndTestCases) {
SimplePersonName name = new SimplePersonName(nameAndTestCases.nameFields);
if (forDebugging) {
System.out.println(nameAndTestCases.nameFields);
}
for (String[] testCase : nameAndTestCases.testCases) {
ULocale formatterLocale = new ULocale(testCase[0]);
PersonNameFormatter.Length formatterLength = PersonNameFormatter.Length.valueOf(testCase[1]);
PersonNameFormatter.Usage formatterUsage = PersonNameFormatter.Usage.valueOf(testCase[2]);
PersonNameFormatter.Formality formatterFormality = PersonNameFormatter.Formality.valueOf(testCase[3]);
Set<PersonNameFormatter.Options> formatterOptions = makeOptionsSet(testCase[4]);
String expectedResult = testCase[5];
PersonNameFormatter formatter = new PersonNameFormatter(formatterLocale, formatterLength, formatterUsage, formatterFormality, formatterOptions);
String actualResult = formatter.format(name);
if (forDebugging) {
System.out.println(" " + formatterLocale + "," + formatterLength + "," + formatterUsage + "," + formatterFormality + "," + formatterOptions + " => " + actualResult);
} else {
assertEquals("Wrong formatting result for " + nameAndTestCases.nameFields + "," + Arrays.toString(testCase), expectedResult, actualResult);
}
}
}
}
private static Set<PersonNameFormatter.Options> makeOptionsSet(String optionsStr) {
Set<PersonNameFormatter.Options> result = new HashSet<>();
StringTokenizer tok = new StringTokenizer(optionsStr, ",");
while (tok.hasMoreTokens()) {
String optionStr = tok.nextToken();
PersonNameFormatter.Options option = PersonNameFormatter.Options.valueOf(optionStr);
result.add(option);
}
return result;
}
@Test
public void TestEnglishName() {
executeTestCases(new NameAndTestCases[]{
new NameAndTestCases("locale=en_US,prefix=Mr.,given=Richard,given-informal=Rich,given2=Theodore,surname=Gillam", new String[][] {
// test all the different combinations of parameters with the normal name order
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "Richard Theodore Gillam" },
{ "en_US", "LONG", "REFERRING", "INFORMAL", "", "Rich Gillam" },
{ "en_US", "LONG", "ADDRESSING", "FORMAL", "", "Mr. Gillam" },
{ "en_US", "LONG", "ADDRESSING", "INFORMAL", "", "Rich" },
{ "en_US", "MEDIUM", "REFERRING", "FORMAL", "", "Richard T. Gillam" },
{ "en_US", "MEDIUM", "REFERRING", "INFORMAL", "", "Rich Gillam" },
{ "en_US", "MEDIUM", "ADDRESSING", "FORMAL", "", "Mr. Gillam" },
{ "en_US", "MEDIUM", "ADDRESSING", "INFORMAL", "", "Rich" },
{ "en_US", "SHORT", "REFERRING", "FORMAL", "", "R. T. Gillam" },
{ "en_US", "SHORT", "REFERRING", "INFORMAL", "", "Rich G." },
{ "en_US", "SHORT", "ADDRESSING", "FORMAL", "", "Mr. Gillam" },
{ "en_US", "SHORT", "ADDRESSING", "INFORMAL", "", "Rich" },
// test all the different combinations of parameters for "sorting" order
{ "en_US", "LONG", "REFERRING", "FORMAL", "SORTING", "Gillam, Richard Theodore" },
{ "en_US", "LONG", "REFERRING", "INFORMAL", "SORTING", "Gillam, Rich" },
{ "en_US", "MEDIUM", "REFERRING", "FORMAL", "SORTING", "Gillam, Richard T." },
{ "en_US", "MEDIUM", "REFERRING", "INFORMAL", "SORTING", "Gillam, Rich" },
{ "en_US", "SHORT", "REFERRING", "FORMAL", "SORTING", "Gillam, R. T." },
{ "en_US", "SHORT", "REFERRING", "INFORMAL", "SORTING", "Gillam, Rich" },
// we don't really support ADDRESSING in conjunction with SORTING-- it should always
// do the same thing as REFERRING
{ "en_US", "LONG", "ADDRESSING", "FORMAL", "SORTING", "Gillam, Richard Theodore" },
{ "en_US", "LONG", "ADDRESSING", "INFORMAL", "SORTING", "Gillam, Rich" },
{ "en_US", "MEDIUM", "ADDRESSING", "FORMAL", "SORTING", "Gillam, Richard T." },
{ "en_US", "MEDIUM", "ADDRESSING", "INFORMAL", "SORTING", "Gillam, Rich" },
{ "en_US", "SHORT", "ADDRESSING", "FORMAL", "SORTING", "Gillam, R. T." },
{ "en_US", "SHORT", "ADDRESSING", "INFORMAL", "SORTING", "Gillam, Rich" },
// finally, try the different variations of MONOGRAM
{ "en_US", "LONG", "MONOGRAM", "FORMAL", "", "RTG" },
{ "en_US", "LONG", "MONOGRAM", "INFORMAL", "", "RG" },
{ "en_US", "MEDIUM", "MONOGRAM", "FORMAL", "", "G" },
{ "en_US", "MEDIUM", "MONOGRAM", "INFORMAL", "", "R" },
{ "en_US", "SHORT", "MONOGRAM", "FORMAL", "", "G" },
{ "en_US", "SHORT", "MONOGRAM", "INFORMAL", "", "R" },
// and again, we don't support SORTING for monograms, so it should also do the
// same thing as GIVEN_FIRST
{ "en_US", "LONG", "MONOGRAM", "FORMAL", "SORTING", "RTG" },
{ "en_US", "LONG", "MONOGRAM", "INFORMAL", "SORTING", "RG" },
{ "en_US", "MEDIUM", "MONOGRAM", "FORMAL", "SORTING", "G" },
{ "en_US", "MEDIUM", "MONOGRAM", "INFORMAL", "SORTING", "R" },
{ "en_US", "SHORT", "MONOGRAM", "FORMAL", "SORTING", "G" },
{ "en_US", "SHORT", "MONOGRAM", "INFORMAL", "SORTING", "R" },
})
}, false);
}
@Test
public void TestPrefixCore() {
executeTestCases(new NameAndTestCases[]{
new NameAndTestCases("locale=en_US,given=Willem,surname-prefix=van der,surname-core=Plas", new String[][] {
// for normal formatting, the {surname} field is just "{surname-prefix} {surname-core}"
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "Willem van der Plas" },
{ "en_US", "LONG", "REFERRING", "INFORMAL", "", "Willem van der Plas" },
{ "en_US", "MEDIUM", "REFERRING", "FORMAL", "", "Willem van der Plas" },
{ "en_US", "MEDIUM", "REFERRING", "INFORMAL", "", "Willem van der Plas" },
{ "en_US", "SHORT", "REFERRING", "FORMAL", "", "W. van der Plas" },
// for FORMAL SORTING, we sort by "surname-core", with "surname-prefix" at the end
{ "en_US", "LONG", "REFERRING", "FORMAL", "SORTING", "Plas, Willem van der" },
{ "en_US", "MEDIUM", "REFERRING", "FORMAL", "SORTING", "Plas, Willem van der" },
{ "en_US", "SHORT", "REFERRING", "FORMAL", "SORTING", "Plas, W. van der" },
// but for INFORMAL SORTING, we keep the surname together and sort by the prefix
{ "en_US", "LONG", "REFERRING", "INFORMAL", "SORTING", "van der Plas, Willem" },
{ "en_US", "MEDIUM", "REFERRING", "INFORMAL", "SORTING", "van der Plas, Willem" },
{ "en_US", "SHORT", "REFERRING", "INFORMAL", "SORTING", "van der Plas, Willem" },
// the default (English) logic for initials doesn't do anything special with the surname-prefix--
// it gets initials too, which is probably wrong
{ "en_US", "SHORT", "REFERRING", "INFORMAL", "", "Willem v. d. P." },
// and (English) monogram generation doesn't do anything special with the prefix either
{ "en_US", "LONG", "MONOGRAM", "FORMAL", "", "WV" },
{ "en_US", "LONG", "MONOGRAM", "INFORMAL", "", "WV" },
// but Dutch monogram generation _does_ handle the prefix specially
{ "nl_NL", "LONG", "MONOGRAM", "FORMAL", "", "WvP" },
{ "nl_NL", "LONG", "MONOGRAM", "INFORMAL", "", "WvP" },
}),
new NameAndTestCases("locale=en_US,given=Willem,surname=van der Plas", new String[][] {
// if we just use the "surname" field instead of "surname-prefix" and "surname-core", everything's
// the same, except (obviously) for the cases where we were doing something special with the
// prefix and core
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "Willem van der Plas" },
{ "en_US", "SHORT", "REFERRING", "FORMAL", "", "W. van der Plas" },
// for example, SORTING works the same way regardless of formality
{ "en_US", "LONG", "REFERRING", "FORMAL", "SORTING", "van der Plas, Willem" },
{ "en_US", "MEDIUM", "REFERRING", "FORMAL", "SORTING", "van der Plas, Willem" },
{ "en_US", "SHORT", "REFERRING", "FORMAL", "SORTING", "van der Plas, W." },
{ "en_US", "LONG", "REFERRING", "INFORMAL", "SORTING", "van der Plas, Willem" },
{ "en_US", "MEDIUM", "REFERRING", "INFORMAL", "SORTING", "van der Plas, Willem" },
{ "en_US", "SHORT", "REFERRING", "INFORMAL", "SORTING", "van der Plas, Willem" },
// and monogram generation works the same in English and Dutch
{ "en_US", "LONG", "MONOGRAM", "FORMAL", "", "WV" },
{ "en_US", "LONG", "MONOGRAM", "INFORMAL", "", "WV" },
{ "nl_NL", "LONG", "MONOGRAM", "FORMAL", "", "WV" },
{ "nl_NL", "LONG", "MONOGRAM", "INFORMAL", "", "WV" },
}),
new NameAndTestCases("locale=en_US,given=Willem,surname-prefix=van der,surname-core=Plas,surname-initial=vdP.,surname-monogram=vdP", new String[][] {
// we can work around the initial generation by providing a "surname-initial" field in the name object
{ "en_US", "SHORT", "REFERRING", "INFORMAL", "", "Willem vdP." },
// we could also (theoretically) work around the monogram-generation problem in English in the same way
{ "en_US", "LONG", "MONOGRAM", "FORMAL", "", "WVDP" },
{ "en_US", "LONG", "MONOGRAM", "INFORMAL", "", "WVDP" },
}),
}, false);
}
@Test
public void TestInitialGeneration() {
executeTestCases(new NameAndTestCases[]{
new NameAndTestCases("locale=en_US,given=George,given2=Herbert Walker,surname=Bush", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "George Herbert Walker Bush" },
{ "en_US", "MEDIUM", "REFERRING", "FORMAL", "", "George H. W. Bush" },
{ "en_US", "SHORT", "REFERRING", "FORMAL", "", "G. H. W. Bush" },
{ "en_US", "SHORT", "REFERRING", "INFORMAL", "", "George B." },
{ "en_US", "LONG", "MONOGRAM", "FORMAL", "", "GHB" },
{ "en_US", "LONG", "MONOGRAM", "INFORMAL", "", "GB" },
}),
new NameAndTestCases("locale=en_US,given=Ralph,surname=Vaughan Williams", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "Ralph Vaughan Williams" },
{ "en_US", "MEDIUM", "REFERRING", "FORMAL", "", "Ralph Vaughan Williams" },
{ "en_US", "SHORT", "REFERRING", "FORMAL", "", "R. Vaughan Williams" },
{ "en_US", "SHORT", "REFERRING", "INFORMAL", "", "Ralph V. W." },
{ "en_US", "LONG", "MONOGRAM", "FORMAL", "", "RV" },
{ "en_US", "LONG", "MONOGRAM", "INFORMAL", "", "RV" },
}),
new NameAndTestCases("locale=en_US,given=John Paul,given2=Stephen David George,surname=Smith", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "John Paul Stephen David George Smith" },
{ "en_US", "MEDIUM", "REFERRING", "FORMAL", "", "John Paul S. D. G. Smith" },
{ "en_US", "SHORT", "REFERRING", "FORMAL", "", "J. P. S. D. G. Smith" },
{ "en_US", "SHORT", "REFERRING", "INFORMAL", "", "John Paul S." },
{ "en_US", "LONG", "MONOGRAM", "FORMAL", "", "JSS" },
{ "en_US", "LONG", "MONOGRAM", "INFORMAL", "", "JS" },
}),
}, false);
}
@Test
public void TestLiteralTextElision() {
executeTestCases(new NameAndTestCases[]{
// literal text elision is difficult to test with the real locale data, although this is a start
// perhaps we could add an API for debugging that lets us pass in real pattern strings, but I'd like to stay away from that
new NameAndTestCases("locale=en_US,given=John,given2=Paul,surname=Smith,suffix=Jr.", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "John Paul Smith Jr." },
}),
new NameAndTestCases("locale=en_US,given=John,given2=Paul,surname=Smith", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "John Paul Smith" },
}),
new NameAndTestCases("locale=en_US,given2=Paul,surname=Smith", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "Paul Smith" },
}),
new NameAndTestCases("locale=en_US,given2=Paul", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "Paul" },
}),
new NameAndTestCases("locale=en_US,given=John", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "John" },
}),
new NameAndTestCases("locale=en_US,given=John,suffix=Jr.", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "John Jr." },
}),
}, false);
}
@Test
public void TestMultiplePatterns() {
executeTestCases(new NameAndTestCases[]{
// the Spanish rules have two name patterns for many of the sorting cases: one to use if the surname2
// field is populated and one to use if not-- these allow the comma between the fields to be displayed
// in the right place. This test checks to make sure we're using the right pattern based on which
// fields are present in the actual name
new NameAndTestCases("locale=es_ES,given=Andrés,given2=Manuel,surname=López,surname2=Obrador", new String[][] {
{ "es_ES", "LONG", "REFERRING", "FORMAL", "", "Andrés Manuel López Obrador" },
{ "es_ES", "LONG", "REFERRING", "FORMAL", "SORTING" , "López Obrador, Andrés Manuel" },
}),
new NameAndTestCases("locale=es_ES,given=Andrés,given2=Manuel,surname=López", new String[][] {
{ "es_ES", "LONG", "REFERRING", "FORMAL", "", "Andrés Manuel López" },
{ "es_ES", "LONG", "REFERRING", "FORMAL", "SORTING" , "López, Andrés Manuel" },
}),
}, false);
}
@Test
public void TestNameOrder() {
executeTestCases(new NameAndTestCases[]{
// the name's locale is used to determine the field order. For the English name formatter, if the
// name is English, the order is GN first. If it's Japanese, it's SN first. This is true whether the
// Japanese name is written in Latin letters or Han characters
new NameAndTestCases("locale=en_US,given=Shinzo,surname=Abe", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "Shinzo Abe" },
}),
new NameAndTestCases("locale=ja_JP,given=Shinzo,surname=Abe", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "Abe Shinzo" },
}),
new NameAndTestCases("locale=ja_JP,given=晋三,surname=安倍", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "安倍 晋三" },
}),
// the name can also declare its order directly, with the optional "preferredOrder" field. If it does this,
// the value of that field holds for all formatter locales and overrides determining the order
// by looking at the name's locale
new NameAndTestCases("locale=en_US,given=Shinzo,surname=Abe,preferredOrder=surnameFirst", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "Abe Shinzo" },
}),
new NameAndTestCases("locale=ja_JP,given=Shinzo,surname=Abe,preferredOrder=givenFirst", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "Shinzo Abe" },
}),
}, false);
}
@Test
public void TestCapitalizedSurname() {
executeTestCases(new NameAndTestCases[]{
// the SURNAME_ALLCAPS option does just what it says: it causes the surname field
// to be displayed in all caps
new NameAndTestCases("locale=en_US,given=Shinzo,surname=Abe", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "Shinzo Abe" },
{ "en_US", "LONG", "REFERRING", "FORMAL", "SURNAME_ALLCAPS", "Shinzo ABE" },
}),
new NameAndTestCases("locale=ja_JP,given=Shinzo,surname=Abe", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "Abe Shinzo" },
{ "en_US", "LONG", "REFERRING", "FORMAL", "SURNAME_ALLCAPS", "ABE Shinzo" },
}),
}, false);
}
@Test
public void TestNameSpacing() {
executeTestCases(new NameAndTestCases[]{
// if the formatter locale uses spaces, the result will use its formats (complete with spaces),
// regardless of locale
new NameAndTestCases("locale=ja_JP,given=Hayao,surname=Miyazaki", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "Miyazaki Hayao" },
}),
new NameAndTestCases("locale=ja_JP,given=駿,surname=宮崎", new String[][] {
{ "en_US", "LONG", "REFERRING", "FORMAL", "", "宮崎 駿" },
}),
// if the formatter locale doesn't use spaces and the name's locale doesn't either, just use
// the native formatter
new NameAndTestCases("locale=ja_JP,given=駿,surname=宮崎", new String[][] {
// (the Japanese name formatter actually inserts a space even for native names)
{ "ja_JP", "LONG", "REFERRING", "FORMAL", "", "宮崎 駿" },
{ "zh_CN", "LONG", "REFERRING", "FORMAL", "", "宮崎駿" },
}),
// if the formatter locale doesn't use spaces and the name's locale does, use the name locale's formatter,
// but if the name is still using the formatter locale's script, use the native formatter's
// "foreign space replacement" character instead of spaces
new NameAndTestCases("locale=en_US,given=Albert,surname=Einstein", new String[][] {
{ "ja_JP", "LONG", "REFERRING", "FORMAL", "", "Albert Einstein" },
{ "zh_CN", "LONG", "REFERRING", "FORMAL", "", "Albert Einstein" },
}),
new NameAndTestCases("locale=en_US,given=アルベルト,surname=アインシュタイン", new String[][] {
{ "ja_JP", "LONG", "REFERRING", "FORMAL", "", "アルベルト・アインシュタイン" },
}),
new NameAndTestCases("locale=en_US,given=阿尔伯特,surname=爱因斯坦", new String[][] {
{ "zh_CN", "LONG", "REFERRING", "FORMAL", "", "阿尔伯特·爱因斯坦" },
}),
}, false);
}
// need tests (and implementation?) for:
// - foreign space replacement
}