ICU-22294 UTS46 transitional=deprecated, change DEFAULT

This commit is contained in:
Markus Scherer 2024-09-06 13:47:14 -07:00
parent 415a7accc5
commit f062f52c12
5 changed files with 217 additions and 138 deletions

View file

@ -70,6 +70,7 @@ public:
* The worker functions use transitional processing, including deviation mappings,
* unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
* is used in which case the deviation characters are passed through without change.
* <b>Unicode 15.1 UTS #46 deprecated transitional processing.</b>
*
* Disallowed characters are mapped to U+FFFD.
*
@ -82,6 +83,8 @@ public:
* letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
*
* @param options Bit set to modify the processing and error checking.
* These should include UIDNA_DEFAULT, or
* UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE.
* See option bit set values in uidna.h.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns

View file

@ -49,11 +49,19 @@
*/
enum {
/**
* Default options value: None of the other options are set.
* Default options value: UTS #46 nontransitional processing.
* For use in static worker and factory methods.
*
* Since ICU 76, this is the same as
* UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE,
* corresponding to Unicode 15.1 UTS #46 deprecating transitional processing.
* (These options are ignored by the IDNA2003 implementation.)
*
* Before ICU 76, this constant did not set any of the options.
*
* @stable ICU 2.6
*/
UIDNA_DEFAULT=0,
UIDNA_DEFAULT=0x30,
#ifndef U_HIDE_DEPRECATED_API
/**
* Option to allow unassigned code points in domain names and labels.
@ -91,19 +99,27 @@ enum {
/**
* IDNA option for nontransitional processing in ToASCII().
* For use in static worker and factory methods.
*
* <p>By default, ToASCII() uses transitional processing.
* Unicode 15.1 UTS #46 deprecated transitional processing.
*
* <p>This option is ignored by the IDNA2003 implementation.
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
* @stable ICU 4.6
* @see UIDNA_DEFAULT
*/
UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
/**
* IDNA option for nontransitional processing in ToUnicode().
* For use in static worker and factory methods.
*
* <p>By default, ToUnicode() uses transitional processing.
* Unicode 15.1 UTS #46 deprecated transitional processing.
*
* <p>This option is ignored by the IDNA2003 implementation.
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
* @stable ICU 4.6
* @see UIDNA_DEFAULT
*/
UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
/**
@ -134,6 +150,8 @@ typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
* For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
*
* @param options Bit set to modify the processing and error checking.
* These should include UIDNA_DEFAULT, or
* UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE.
* See option bit set values in uidna.h.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns

View file

@ -42,6 +42,7 @@ public:
void TestNotSTD3();
void TestInvalidPunycodeDigits();
void TestACELabelEdgeCases();
void TestDefaultNontransitional();
void TestTooLong();
void TestSomeCases();
void IdnaTest();
@ -88,6 +89,7 @@ void UTS46Test::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
TESTCASE_AUTO(TestNotSTD3);
TESTCASE_AUTO(TestInvalidPunycodeDigits);
TESTCASE_AUTO(TestACELabelEdgeCases);
TESTCASE_AUTO(TestDefaultNontransitional);
TESTCASE_AUTO(TestTooLong);
TESTCASE_AUTO(TestSomeCases);
TESTCASE_AUTO(IdnaTest);
@ -354,6 +356,27 @@ void UTS46Test::TestACELabelEdgeCases() {
}
}
void UTS46Test::TestDefaultNontransitional() {
IcuTestErrorCode errorCode(*this, "TestDefaultNontransitional()");
// Unicode 15.1 UTS #46 deprecated transitional processing.
// ICU 76 changed UIDNA_DEFAULT to set the nontransitional options.
LocalPointer<IDNA> forZero(IDNA::createUTS46Instance(0, errorCode));
LocalPointer<IDNA> forDefault(IDNA::createUTS46Instance(UIDNA_DEFAULT, errorCode));
if(errorCode.isFailure()) {
return;
}
UnicodeString result;
IDNAInfo info;
forZero->labelToUnicode(u"Fⓤßẞ", result, info, errorCode);
assertEquals("forZero.toUnicode(Fⓤßẞ)", u"fussss", result);
forZero->labelToASCII(u"Fⓤßẞ", result, info, errorCode);
assertEquals("forZero.toASCII(Fⓤßẞ)", u"fussss", result);
forDefault->labelToUnicode(u"Fⓤßẞ", result, info, errorCode);
assertEquals("forDefault.toUnicode(Fⓤßẞ)", u"fußß", result);
forDefault->labelToASCII(u"Fⓤßẞ", result, info, errorCode);
assertEquals("forDefault.toASCII(Fⓤßẞ)", u"xn--fu-hiaa", result);
}
void UTS46Test::TestTooLong() {
// ICU-13727: Limit input length for n^2 algorithm
// where well-formed strings are at most 59 characters long.

View file

@ -24,7 +24,7 @@ import com.ibm.icu.impl.UTS46;
* The IDNA class is not intended for public subclassing.
* <p>
* The non-static methods implement UTS #46 and IDNA2008.
* IDNA2008 is implemented according to UTS #46, see getUTS46Instance().
* IDNA2008 is implemented according to UTS #46, see {@link #getUTS46Instance(int)}.
* <p>
* IDNA2003 is obsolete. The static methods implement IDNA2003. They are all deprecated.
* <p>
@ -32,35 +32,43 @@ import com.ibm.icu.impl.UTS46;
* <p>
* The static IDNA API methods implement the IDNA protocol as defined in the
* <a href="http://www.ietf.org/rfc/rfc3490.txt">IDNA RFC</a>.
* The draft defines 2 operations: ToASCII and ToUnicode. Domain labels
* The draft defines 2 operations: ToASCII and ToUnicode. Domain labels
* containing non-ASCII code points are required to be processed by
* ToASCII operation before passing it to resolver libraries. Domain names
* that are obtained from resolver libraries are required to be processed by
* ToUnicode operation before displaying the domain name to the user.
* IDNA requires that implementations process input strings with
* <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a>,
* which is a profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a> ,
* and then with <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a>.
* Implementations of IDNA MUST fully implement Nameprep and Punycode;
* IDNA requires that implementations process input strings with
* <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a>,
* which is a profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a> ,
* and then with <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a>.
* Implementations of IDNA MUST fully implement Nameprep and Punycode;
* neither Nameprep nor Punycode are optional.
* The input and output of ToASCII and ToUnicode operations are Unicode
* The input and output of ToASCII and ToUnicode operations are Unicode
* and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
* multiple times to an input string will yield the same result as applying the operation
* once.
* ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
* ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
* ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
*
*
* @author Ram Viswanadha, Markus Scherer
* @stable ICU 2.8
*/
public abstract class IDNA {
/**
* Default options value: None of the other options are set.
/**
* Default options value: UTS #46 nontransitional processing.
* For use in static worker and factory methods.
*
* <p>Since ICU 76, this is the same as
* {@link #NONTRANSITIONAL_TO_ASCII} | {@link #NONTRANSITIONAL_TO_UNICODE},
* corresponding to Unicode 15.1 UTS #46 deprecating transitional processing.
* (These options are ignored by the IDNA2003 implementation.)
*
* <p>Before ICU 76, this constant did not set any of the options.
*
* @stable ICU 2.8
*/
public static final int DEFAULT = 0;
/**
public static final int DEFAULT = 0x30;
/**
* Option to allow unassigned code points in domain names and labels.
* For use in static worker and factory methods.
* <p>This option is ignored by the UTS46 implementation.
@ -69,7 +77,7 @@ public abstract class IDNA {
*/
@Deprecated
public static final int ALLOW_UNASSIGNED = 1;
/**
/**
* Option to check whether the input conforms to the STD3 ASCII rules,
* for example the restriction of labels to LDH characters
* (ASCII Letters, Digits and Hyphen-Minus).
@ -96,7 +104,10 @@ public abstract class IDNA {
/**
* IDNA option for nontransitional processing in ToASCII().
* For use in static worker and factory methods.
*
* <p>By default, ToASCII() uses transitional processing.
* Unicode 15.1 UTS #46 deprecated transitional processing.
*
* <p>This option is ignored by the IDNA2003 implementation.
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
* @stable ICU 4.6
@ -105,7 +116,10 @@ public abstract class IDNA {
/**
* IDNA option for nontransitional processing in ToUnicode().
* For use in static worker and factory methods.
*
* <p>By default, ToUnicode() uses transitional processing.
* Unicode 15.1 UTS #46 deprecated transitional processing.
*
* <p>This option is ignored by the IDNA2003 implementation.
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
* @stable ICU 4.6
@ -133,8 +147,9 @@ public abstract class IDNA {
* IDNA2003 and IDNA2008.
* <p>
* The worker functions use transitional processing, including deviation mappings,
* unless NONTRANSITIONAL_TO_ASCII or NONTRANSITIONAL_TO_UNICODE
* unless {@link #NONTRANSITIONAL_TO_ASCII} or {@link #NONTRANSITIONAL_TO_UNICODE}
* is used in which case the deviation characters are passed through without change.
* <b>Unicode 15.1 UTS #46 deprecated transitional processing.</b>
* <p>
* Disallowed characters are mapped to U+FFFD.
* <p>
@ -146,6 +161,8 @@ public abstract class IDNA {
* letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
*
* @param options Bit set to modify the processing and error checking.
* These should include {@link IDNA#DEFAULT}, or
* {@link IDNA#NONTRANSITIONAL_TO_ASCII} | {@link IDNA#NONTRANSITIONAL_TO_UNICODE}.
* @return the UTS #46 IDNA instance, if successful
* @stable ICU 4.6
*/
@ -474,22 +491,22 @@ public abstract class IDNA {
* IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
* This operation is done on <b>single labels</b> before sending it to something that expects
* ASCII names. A label is an individual part of a domain name. Labels are usually
* separated by dots; e.g." "www.example.com" is composed of 3 labels
* separated by dots; e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
*
* @param src The input string to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* If unassigned code points are found the operation fails with
* StringPrepParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @throws StringPrepParseException When an error occurs for parsing a string.
@ -501,27 +518,27 @@ public abstract class IDNA {
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToASCII(iter,options);
}
/**
* IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
* This operation is done on <b>single labels</b> before sending it to something that expects
* ASCII names. A label is an individual part of a domain name. Labels are usually
* separated by dots; e.g." "www.example.com" is composed of 3 labels
* separated by dots; e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
*
* @param src The input string as StringBuffer to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@ -532,27 +549,27 @@ public abstract class IDNA {
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToASCII(iter,options);
}
/**
* IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
* This operation is done on <b>single labels</b> before sending it to something that expects
* ASCII names. A label is an individual part of a domain name. Labels are usually
* separated by dots; e.g." "www.example.com" is composed of 3 labels
* separated by dots; e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
*
* @param src The input string as UCharacterIterator to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@ -565,29 +582,29 @@ public abstract class IDNA {
/**
* IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
* This operation is done on complete domain names, e.g: "www.example.com".
* It is important to note that this operation can fail. If it fails, then the input
* This operation is done on complete domain names, e.g: "www.example.com".
* It is important to note that this operation can fail. If it fails, then the input
* domain name cannot be used as an Internationalized Domain Name and the application
* should have methods defined to deal with the failure.
*
*
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string as UCharacterIterator to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@ -595,34 +612,34 @@ public abstract class IDNA {
@Deprecated
public static StringBuffer convertIDNToASCII(UCharacterIterator src, int options)
throws StringPrepParseException{
return convertIDNToASCII(src.getText(), options);
return convertIDNToASCII(src.getText(), options);
}
/**
* IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
* This operation is done on complete domain names, e.g: "www.example.com".
* It is important to note that this operation can fail. If it fails, then the input
* This operation is done on complete domain names, e.g: "www.example.com".
* It is important to note that this operation can fail. If it fails, then the input
* domain name cannot be used as an Internationalized Domain Name and the application
* should have methods defined to deal with the failure.
*
*
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string as a StringBuffer to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@ -630,34 +647,34 @@ public abstract class IDNA {
@Deprecated
public static StringBuffer convertIDNToASCII(StringBuffer src, int options)
throws StringPrepParseException{
return convertIDNToASCII(src.toString(), options);
return convertIDNToASCII(src.toString(), options);
}
/**
* IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
* This operation is done on complete domain names, e.g: "www.example.com".
* It is important to note that this operation can fail. If it fails, then the input
* This operation is done on complete domain names, e.g: "www.example.com".
* It is important to note that this operation can fail. If it fails, then the input
* domain name cannot be used as an Internationalized Domain Name and the application
* should have methods defined to deal with the failure.
*
*
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@ -668,27 +685,27 @@ public abstract class IDNA {
return IDNA2003.convertIDNToASCII(src, options);
}
/**
* IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
* This operation is done on <b>single labels</b> before sending it to something that expects
* Unicode names. A label is an individual part of a domain name. Labels are usually
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
*
*
* @param src The input string to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@ -699,27 +716,27 @@ public abstract class IDNA {
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToUnicode(iter,options);
}
/**
* IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
* This operation is done on <b>single labels</b> before sending it to something that expects
* Unicode names. A label is an individual part of a domain name. Labels are usually
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
*
*
* @param src The input string as StringBuffer to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@ -730,27 +747,27 @@ public abstract class IDNA {
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToUnicode(iter,options);
}
/**
* IDNA2003: Function that implements the ToUnicode operation as defined in the IDNA RFC.
* This operation is done on <b>single labels</b> before sending it to something that expects
* Unicode names. A label is an individual part of a domain name. Labels are usually
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
*
*
* @param src The input string as UCharacterIterator to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@ -760,29 +777,29 @@ public abstract class IDNA {
throws StringPrepParseException{
return IDNA2003.convertToUnicode(src, options);
}
/**
* IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
* This operation is done on complete domain names, e.g: "www.example.com".
* This operation is done on complete domain names, e.g: "www.example.com".
*
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string as UCharacterIterator to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@ -792,29 +809,29 @@ public abstract class IDNA {
throws StringPrepParseException{
return convertIDNToUnicode(src.getText(), options);
}
/**
* IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
* This operation is done on complete domain names, e.g: "www.example.com".
* This operation is done on complete domain names, e.g: "www.example.com".
*
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string as StringBuffer to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@ -824,29 +841,29 @@ public abstract class IDNA {
throws StringPrepParseException{
return convertIDNToUnicode(src.toString(), options);
}
/**
* IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
* This operation is done on complete domain names, e.g: "www.example.com".
* This operation is done on complete domain names, e.g: "www.example.com".
*
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
* and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@ -856,30 +873,30 @@ public abstract class IDNA {
throws StringPrepParseException{
return IDNA2003.convertIDNToUnicode(src, options);
}
/**
* IDNA2003: Compare two IDN strings for equivalence.
* This function splits the domain names into labels and compares them.
* According to IDN RFC, whenever two labels are compared, they are
* considered equal if and only if their ASCII forms (obtained by
* According to IDN RFC, whenever two labels are compared, they are
* considered equal if and only if their ASCII forms (obtained by
* applying toASCII) match using an case-insensitive ASCII comparison.
* Two domain names are considered a match if and only if all labels
* Two domain names are considered a match if and only if all labels
* match regardless of whether label separators match.
*
*
* @param s1 First IDN string as StringBuffer
* @param s2 Second IDN string as StringBuffer
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return 0 if the strings are equal, &gt; 0 if s1 &gt; s2 and &lt; 0 if s1 &lt; s2
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@ -892,30 +909,30 @@ public abstract class IDNA {
}
return IDNA2003.compare(s1.toString(), s2.toString(), options);
}
/**
* IDNA2003: Compare two IDN strings for equivalence.
* This function splits the domain names into labels and compares them.
* According to IDN RFC, whenever two labels are compared, they are
* considered equal if and only if their ASCII forms (obtained by
* According to IDN RFC, whenever two labels are compared, they are
* considered equal if and only if their ASCII forms (obtained by
* applying toASCII) match using an case-insensitive ASCII comparison.
* Two domain names are considered a match if and only if all labels
* Two domain names are considered a match if and only if all labels
* match regardless of whether label separators match.
*
* @param s1 First IDN string
*
* @param s1 First IDN string
* @param s2 Second IDN string
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return 0 if the strings are equal, &gt; 0 if s1 &gt; s2 and &lt; 0 if s1 &lt; s2
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@ -930,26 +947,26 @@ public abstract class IDNA {
/**
* IDNA2003: Compare two IDN strings for equivalence.
* This function splits the domain names into labels and compares them.
* According to IDN RFC, whenever two labels are compared, they are
* considered equal if and only if their ASCII forms (obtained by
* According to IDN RFC, whenever two labels are compared, they are
* considered equal if and only if their ASCII forms (obtained by
* applying toASCII) match using an case-insensitive ASCII comparison.
* Two domain names are considered a match if and only if all labels
* Two domain names are considered a match if and only if all labels
* match regardless of whether label separators match.
*
*
* @param s1 First IDN string as UCharacterIterator
* @param s2 Second IDN string as UCharacterIterator
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
*
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return 0 if the strings are equal, &gt; 0 if i1 &gt; i2 and &lt; 0 if i1 &lt; i2
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.

View file

@ -179,6 +179,24 @@ public class UTS46Test extends CoreTestFmwk {
info.getErrors().contains(IDNA.Error.INVALID_ACE_LABEL));
}
@Test
public void TestDefaultNontransitional() {
// Unicode 15.1 UTS #46 deprecated transitional processing.
// ICU 76 changed IDNA.DEFAULT to set the nontransitional options.
IDNA forZero = IDNA.getUTS46Instance(0);
IDNA forDefault = IDNA.getUTS46Instance(IDNA.DEFAULT);
StringBuilder result = new StringBuilder();
IDNA.Info info = new IDNA.Info();
forZero.labelToUnicode("Fⓤßẞ", result, info);
assertEquals("forZero.toUnicode(Fⓤßẞ)", "fussss", result.toString());
forZero.labelToASCII("Fⓤßẞ", result, info);
assertEquals("forZero.toASCII(Fⓤßẞ)", "fussss", result.toString());
forDefault.labelToUnicode("Fⓤßẞ", result, info);
assertEquals("forDefault.toUnicode(Fⓤßẞ)", "fußß", result.toString());
forDefault.labelToASCII("Fⓤßẞ", result, info);
assertEquals("forDefault.toASCII(Fⓤßẞ)", "xn--fu-hiaa", result.toString());
}
@Test
public void TestTooLong() {
// ICU-13727: Limit input length for n^2 algorithm