diff --git a/icu4c/source/common/unicode/idna.h b/icu4c/source/common/unicode/idna.h index 1c57205bae2..1e36fa771f0 100644 --- a/icu4c/source/common/unicode/idna.h +++ b/icu4c/source/common/unicode/idna.h @@ -70,6 +70,7 @@ public: * The worker functions use transitional processing, including deviation mappings, * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE * is used in which case the deviation characters are passed through without change. + * Unicode 15.1 UTS #46 deprecated transitional processing. * * Disallowed characters are mapped to U+FFFD. * @@ -82,6 +83,8 @@ public: * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. * * @param options Bit set to modify the processing and error checking. + * These should include UIDNA_DEFAULT, or + * UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE. * See option bit set values in uidna.h. * @param errorCode Standard ICU error code. Its input value must * pass the U_SUCCESS() test, or else the function returns diff --git a/icu4c/source/common/unicode/uidna.h b/icu4c/source/common/unicode/uidna.h index 24a81ceaddf..362a2dcbe65 100644 --- a/icu4c/source/common/unicode/uidna.h +++ b/icu4c/source/common/unicode/uidna.h @@ -49,11 +49,19 @@ */ enum { /** - * Default options value: None of the other options are set. + * Default options value: UTS #46 nontransitional processing. * For use in static worker and factory methods. + * + * Since ICU 76, this is the same as + * UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE, + * corresponding to Unicode 15.1 UTS #46 deprecating transitional processing. + * (These options are ignored by the IDNA2003 implementation.) + * + * Before ICU 76, this constant did not set any of the options. + * * @stable ICU 2.6 */ - UIDNA_DEFAULT=0, + UIDNA_DEFAULT=0x30, #ifndef U_HIDE_DEPRECATED_API /** * Option to allow unassigned code points in domain names and labels. @@ -91,19 +99,27 @@ enum { /** * IDNA option for nontransitional processing in ToASCII(). * For use in static worker and factory methods. + * *
By default, ToASCII() uses transitional processing. + * Unicode 15.1 UTS #46 deprecated transitional processing. + * *
This option is ignored by the IDNA2003 implementation. * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) * @stable ICU 4.6 + * @see UIDNA_DEFAULT */ UIDNA_NONTRANSITIONAL_TO_ASCII=0x10, /** * IDNA option for nontransitional processing in ToUnicode(). * For use in static worker and factory methods. + * *
By default, ToUnicode() uses transitional processing. + * Unicode 15.1 UTS #46 deprecated transitional processing. + * *
This option is ignored by the IDNA2003 implementation.
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
* @stable ICU 4.6
+ * @see UIDNA_DEFAULT
*/
UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
/**
@@ -134,6 +150,8 @@ typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
* For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
*
* @param options Bit set to modify the processing and error checking.
+ * These should include UIDNA_DEFAULT, or
+ * UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE.
* See option bit set values in uidna.h.
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
diff --git a/icu4c/source/test/intltest/uts46test.cpp b/icu4c/source/test/intltest/uts46test.cpp
index 73ff225eb3c..8defd1c1935 100644
--- a/icu4c/source/test/intltest/uts46test.cpp
+++ b/icu4c/source/test/intltest/uts46test.cpp
@@ -42,6 +42,7 @@ public:
void TestNotSTD3();
void TestInvalidPunycodeDigits();
void TestACELabelEdgeCases();
+ void TestDefaultNontransitional();
void TestTooLong();
void TestSomeCases();
void IdnaTest();
@@ -88,6 +89,7 @@ void UTS46Test::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
TESTCASE_AUTO(TestNotSTD3);
TESTCASE_AUTO(TestInvalidPunycodeDigits);
TESTCASE_AUTO(TestACELabelEdgeCases);
+ TESTCASE_AUTO(TestDefaultNontransitional);
TESTCASE_AUTO(TestTooLong);
TESTCASE_AUTO(TestSomeCases);
TESTCASE_AUTO(IdnaTest);
@@ -354,6 +356,27 @@ void UTS46Test::TestACELabelEdgeCases() {
}
}
+void UTS46Test::TestDefaultNontransitional() {
+ IcuTestErrorCode errorCode(*this, "TestDefaultNontransitional()");
+ // Unicode 15.1 UTS #46 deprecated transitional processing.
+ // ICU 76 changed UIDNA_DEFAULT to set the nontransitional options.
+ LocalPointer
* The non-static methods implement UTS #46 and IDNA2008.
- * IDNA2008 is implemented according to UTS #46, see getUTS46Instance().
+ * IDNA2008 is implemented according to UTS #46, see {@link #getUTS46Instance(int)}.
*
* IDNA2003 is obsolete. The static methods implement IDNA2003. They are all deprecated.
*
@@ -32,35 +32,43 @@ import com.ibm.icu.impl.UTS46;
*
* The static IDNA API methods implement the IDNA protocol as defined in the
* IDNA RFC.
- * The draft defines 2 operations: ToASCII and ToUnicode. Domain labels
+ * The draft defines 2 operations: ToASCII and ToUnicode. Domain labels
* containing non-ASCII code points are required to be processed by
* ToASCII operation before passing it to resolver libraries. Domain names
* that are obtained from resolver libraries are required to be processed by
* ToUnicode operation before displaying the domain name to the user.
- * IDNA requires that implementations process input strings with
- * Nameprep,
- * which is a profile of Stringprep ,
- * and then with Punycode.
- * Implementations of IDNA MUST fully implement Nameprep and Punycode;
+ * IDNA requires that implementations process input strings with
+ * Nameprep,
+ * which is a profile of Stringprep ,
+ * and then with Punycode.
+ * Implementations of IDNA MUST fully implement Nameprep and Punycode;
* neither Nameprep nor Punycode are optional.
- * The input and output of ToASCII and ToUnicode operations are Unicode
+ * The input and output of ToASCII and ToUnicode operations are Unicode
* and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
* multiple times to an input string will yield the same result as applying the operation
* once.
- * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
+ * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
* ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
- *
+ *
* @author Ram Viswanadha, Markus Scherer
* @stable ICU 2.8
*/
public abstract class IDNA {
- /**
- * Default options value: None of the other options are set.
+ /**
+ * Default options value: UTS #46 nontransitional processing.
* For use in static worker and factory methods.
+ *
+ * Since ICU 76, this is the same as
+ * {@link #NONTRANSITIONAL_TO_ASCII} | {@link #NONTRANSITIONAL_TO_UNICODE},
+ * corresponding to Unicode 15.1 UTS #46 deprecating transitional processing.
+ * (These options are ignored by the IDNA2003 implementation.)
+ *
+ * Before ICU 76, this constant did not set any of the options.
+ *
* @stable ICU 2.8
*/
- public static final int DEFAULT = 0;
- /**
+ public static final int DEFAULT = 0x30;
+ /**
* Option to allow unassigned code points in domain names and labels.
* For use in static worker and factory methods.
* This option is ignored by the UTS46 implementation.
@@ -69,7 +77,7 @@ public abstract class IDNA {
*/
@Deprecated
public static final int ALLOW_UNASSIGNED = 1;
- /**
+ /**
* Option to check whether the input conforms to the STD3 ASCII rules,
* for example the restriction of labels to LDH characters
* (ASCII Letters, Digits and Hyphen-Minus).
@@ -96,7 +104,10 @@ public abstract class IDNA {
/**
* IDNA option for nontransitional processing in ToASCII().
* For use in static worker and factory methods.
+ *
* By default, ToASCII() uses transitional processing.
+ * Unicode 15.1 UTS #46 deprecated transitional processing.
+ *
* This option is ignored by the IDNA2003 implementation.
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
* @stable ICU 4.6
@@ -105,7 +116,10 @@ public abstract class IDNA {
/**
* IDNA option for nontransitional processing in ToUnicode().
* For use in static worker and factory methods.
+ *
* By default, ToUnicode() uses transitional processing.
+ * Unicode 15.1 UTS #46 deprecated transitional processing.
+ *
* This option is ignored by the IDNA2003 implementation.
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
* @stable ICU 4.6
@@ -133,8 +147,9 @@ public abstract class IDNA {
* IDNA2003 and IDNA2008.
*
* The worker functions use transitional processing, including deviation mappings,
- * unless NONTRANSITIONAL_TO_ASCII or NONTRANSITIONAL_TO_UNICODE
+ * unless {@link #NONTRANSITIONAL_TO_ASCII} or {@link #NONTRANSITIONAL_TO_UNICODE}
* is used in which case the deviation characters are passed through without change.
+ * Unicode 15.1 UTS #46 deprecated transitional processing.
*
* Disallowed characters are mapped to U+FFFD.
*
@@ -146,6 +161,8 @@ public abstract class IDNA {
* letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
*
* @param options Bit set to modify the processing and error checking.
+ * These should include {@link IDNA#DEFAULT}, or
+ * {@link IDNA#NONTRANSITIONAL_TO_ASCII} | {@link IDNA#NONTRANSITIONAL_TO_UNICODE}.
* @return the UTS #46 IDNA instance, if successful
* @stable ICU 4.6
*/
@@ -474,22 +491,22 @@ public abstract class IDNA {
* IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
* This operation is done on single labels before sending it to something that expects
* ASCII names. A label is an individual part of a domain name. Labels are usually
- * separated by dots; e.g." "www.example.com" is composed of 3 labels
+ * separated by dots; e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
*
* @param src The input string to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
+ * If unassigned code points are found the operation fails with
* StringPrepParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
+ * If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
- *
+ *
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
+ * If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @throws StringPrepParseException When an error occurs for parsing a string.
@@ -501,27 +518,27 @@ public abstract class IDNA {
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToASCII(iter,options);
}
-
+
/**
* IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
* This operation is done on single labels before sending it to something that expects
* ASCII names. A label is an individual part of a domain name. Labels are usually
- * separated by dots; e.g." "www.example.com" is composed of 3 labels
+ * separated by dots; e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
*
* @param src The input string as StringBuffer to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
+ * If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
+ * If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
- *
+ *
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
+ * If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@@ -532,27 +549,27 @@ public abstract class IDNA {
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToASCII(iter,options);
}
-
+
/**
* IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
* This operation is done on single labels before sending it to something that expects
* ASCII names. A label is an individual part of a domain name. Labels are usually
- * separated by dots; e.g." "www.example.com" is composed of 3 labels
+ * separated by dots; e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
*
* @param src The input string as UCharacterIterator to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
+ * If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
+ * If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
- *
+ *
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
+ * If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@@ -565,29 +582,29 @@ public abstract class IDNA {
/**
* IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
- * This operation is done on complete domain names, e.g: "www.example.com".
- * It is important to note that this operation can fail. If it fails, then the input
+ * This operation is done on complete domain names, e.g: "www.example.com".
+ * It is important to note that this operation can fail. If it fails, then the input
* domain name cannot be used as an Internationalized Domain Name and the application
* should have methods defined to deal with the failure.
- *
+ *
* Note: IDNA RFC specifies that a conformant application should divide a domain name
- * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
- * and then convert. This function does not offer that level of granularity. The options once
+ * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
+ * and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string as UCharacterIterator to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
+ * If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
+ * If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
- *
+ *
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
+ * If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@@ -595,34 +612,34 @@ public abstract class IDNA {
@Deprecated
public static StringBuffer convertIDNToASCII(UCharacterIterator src, int options)
throws StringPrepParseException{
- return convertIDNToASCII(src.getText(), options);
+ return convertIDNToASCII(src.getText(), options);
}
-
+
/**
* IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
- * This operation is done on complete domain names, e.g: "www.example.com".
- * It is important to note that this operation can fail. If it fails, then the input
+ * This operation is done on complete domain names, e.g: "www.example.com".
+ * It is important to note that this operation can fail. If it fails, then the input
* domain name cannot be used as an Internationalized Domain Name and the application
* should have methods defined to deal with the failure.
- *
+ *
* Note: IDNA RFC specifies that a conformant application should divide a domain name
- * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
- * and then convert. This function does not offer that level of granularity. The options once
+ * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
+ * and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string as a StringBuffer to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
+ * If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
+ * If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
- *
+ *
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
+ * If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@@ -630,34 +647,34 @@ public abstract class IDNA {
@Deprecated
public static StringBuffer convertIDNToASCII(StringBuffer src, int options)
throws StringPrepParseException{
- return convertIDNToASCII(src.toString(), options);
+ return convertIDNToASCII(src.toString(), options);
}
-
+
/**
* IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
- * This operation is done on complete domain names, e.g: "www.example.com".
- * It is important to note that this operation can fail. If it fails, then the input
+ * This operation is done on complete domain names, e.g: "www.example.com".
+ * It is important to note that this operation can fail. If it fails, then the input
* domain name cannot be used as an Internationalized Domain Name and the application
* should have methods defined to deal with the failure.
- *
+ *
* Note: IDNA RFC specifies that a conformant application should divide a domain name
- * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
- * and then convert. This function does not offer that level of granularity. The options once
+ * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
+ * and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
+ * If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
+ * If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
- *
+ *
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
+ * If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@@ -668,27 +685,27 @@ public abstract class IDNA {
return IDNA2003.convertIDNToASCII(src, options);
}
-
+
/**
* IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
* This operation is done on single labels before sending it to something that expects
* Unicode names. A label is an individual part of a domain name. Labels are usually
- * separated by dots; for e.g." "www.example.com" is composed of 3 labels
+ * separated by dots; for e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
- *
+ *
* @param src The input string to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
+ * If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
+ * If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
- *
+ *
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
+ * If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@@ -699,27 +716,27 @@ public abstract class IDNA {
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToUnicode(iter,options);
}
-
+
/**
* IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
* This operation is done on single labels before sending it to something that expects
* Unicode names. A label is an individual part of a domain name. Labels are usually
- * separated by dots; for e.g." "www.example.com" is composed of 3 labels
+ * separated by dots; for e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
- *
+ *
* @param src The input string as StringBuffer to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
+ * If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
+ * If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
- *
+ *
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
+ * If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@@ -730,27 +747,27 @@ public abstract class IDNA {
UCharacterIterator iter = UCharacterIterator.getInstance(src);
return convertToUnicode(iter,options);
}
-
+
/**
* IDNA2003: Function that implements the ToUnicode operation as defined in the IDNA RFC.
* This operation is done on single labels before sending it to something that expects
* Unicode names. A label is an individual part of a domain name. Labels are usually
- * separated by dots; for e.g." "www.example.com" is composed of 3 labels
+ * separated by dots; for e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
- *
+ *
* @param src The input string as UCharacterIterator to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
+ * If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
+ * If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
- *
+ *
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
+ * If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@@ -760,29 +777,29 @@ public abstract class IDNA {
throws StringPrepParseException{
return IDNA2003.convertToUnicode(src, options);
}
-
+
/**
* IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
- * This operation is done on complete domain names, e.g: "www.example.com".
+ * This operation is done on complete domain names, e.g: "www.example.com".
*
* Note: IDNA RFC specifies that a conformant application should divide a domain name
- * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
- * and then convert. This function does not offer that level of granularity. The options once
+ * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
+ * and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string as UCharacterIterator to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
+ * If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
+ * If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
- *
+ *
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
+ * If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@@ -792,29 +809,29 @@ public abstract class IDNA {
throws StringPrepParseException{
return convertIDNToUnicode(src.getText(), options);
}
-
+
/**
* IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
- * This operation is done on complete domain names, e.g: "www.example.com".
+ * This operation is done on complete domain names, e.g: "www.example.com".
*
* Note: IDNA RFC specifies that a conformant application should divide a domain name
- * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
- * and then convert. This function does not offer that level of granularity. The options once
+ * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
+ * and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string as StringBuffer to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
+ * If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
+ * If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
- *
+ *
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
+ * If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@@ -824,29 +841,29 @@ public abstract class IDNA {
throws StringPrepParseException{
return convertIDNToUnicode(src.toString(), options);
}
-
+
/**
* IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
- * This operation is done on complete domain names, e.g: "www.example.com".
+ * This operation is done on complete domain names, e.g: "www.example.com".
*
* Note: IDNA RFC specifies that a conformant application should divide a domain name
- * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
- * and then convert. This function does not offer that level of granularity. The options once
+ * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
+ * and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
+ * If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
+ * If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
- *
+ *
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
+ * If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return StringBuffer the converted String
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@@ -856,30 +873,30 @@ public abstract class IDNA {
throws StringPrepParseException{
return IDNA2003.convertIDNToUnicode(src, options);
}
-
+
/**
* IDNA2003: Compare two IDN strings for equivalence.
* This function splits the domain names into labels and compares them.
- * According to IDN RFC, whenever two labels are compared, they are
- * considered equal if and only if their ASCII forms (obtained by
+ * According to IDN RFC, whenever two labels are compared, they are
+ * considered equal if and only if their ASCII forms (obtained by
* applying toASCII) match using an case-insensitive ASCII comparison.
- * Two domain names are considered a match if and only if all labels
+ * Two domain names are considered a match if and only if all labels
* match regardless of whether label separators match.
- *
+ *
* @param s1 First IDN string as StringBuffer
* @param s2 Second IDN string as StringBuffer
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
+ * If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
+ * If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
- *
+ *
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
+ * If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return 0 if the strings are equal, > 0 if s1 > s2 and < 0 if s1 < s2
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@@ -892,30 +909,30 @@ public abstract class IDNA {
}
return IDNA2003.compare(s1.toString(), s2.toString(), options);
}
-
+
/**
* IDNA2003: Compare two IDN strings for equivalence.
* This function splits the domain names into labels and compares them.
- * According to IDN RFC, whenever two labels are compared, they are
- * considered equal if and only if their ASCII forms (obtained by
+ * According to IDN RFC, whenever two labels are compared, they are
+ * considered equal if and only if their ASCII forms (obtained by
* applying toASCII) match using an case-insensitive ASCII comparison.
- * Two domain names are considered a match if and only if all labels
+ * Two domain names are considered a match if and only if all labels
* match regardless of whether label separators match.
- *
- * @param s1 First IDN string
+ *
+ * @param s1 First IDN string
* @param s2 Second IDN string
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
+ * If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
+ * If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
- *
+ *
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
+ * If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return 0 if the strings are equal, > 0 if s1 > s2 and < 0 if s1 < s2
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
@@ -930,26 +947,26 @@ public abstract class IDNA {
/**
* IDNA2003: Compare two IDN strings for equivalence.
* This function splits the domain names into labels and compares them.
- * According to IDN RFC, whenever two labels are compared, they are
- * considered equal if and only if their ASCII forms (obtained by
+ * According to IDN RFC, whenever two labels are compared, they are
+ * considered equal if and only if their ASCII forms (obtained by
* applying toASCII) match using an case-insensitive ASCII comparison.
- * Two domain names are considered a match if and only if all labels
+ * Two domain names are considered a match if and only if all labels
* match regardless of whether label separators match.
- *
+ *
* @param s1 First IDN string as UCharacterIterator
* @param s2 Second IDN string as UCharacterIterator
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
+ * If unassigned code points are found the operation fails with
* ParseException.
*
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
+ * If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
- *
+ *
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
+ * If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with ParseException
* @return 0 if the strings are equal, > 0 if i1 > i2 and < 0 if i1 < i2
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
diff --git a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/normalizer/UTS46Test.java b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/normalizer/UTS46Test.java
index db56e877697..8824e02df8f 100644
--- a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/normalizer/UTS46Test.java
+++ b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/normalizer/UTS46Test.java
@@ -179,6 +179,24 @@ public class UTS46Test extends CoreTestFmwk {
info.getErrors().contains(IDNA.Error.INVALID_ACE_LABEL));
}
+ @Test
+ public void TestDefaultNontransitional() {
+ // Unicode 15.1 UTS #46 deprecated transitional processing.
+ // ICU 76 changed IDNA.DEFAULT to set the nontransitional options.
+ IDNA forZero = IDNA.getUTS46Instance(0);
+ IDNA forDefault = IDNA.getUTS46Instance(IDNA.DEFAULT);
+ StringBuilder result = new StringBuilder();
+ IDNA.Info info = new IDNA.Info();
+ forZero.labelToUnicode("Fⓤßẞ", result, info);
+ assertEquals("forZero.toUnicode(Fⓤßẞ)", "fussss", result.toString());
+ forZero.labelToASCII("Fⓤßẞ", result, info);
+ assertEquals("forZero.toASCII(Fⓤßẞ)", "fussss", result.toString());
+ forDefault.labelToUnicode("Fⓤßẞ", result, info);
+ assertEquals("forDefault.toUnicode(Fⓤßẞ)", "fußß", result.toString());
+ forDefault.labelToASCII("Fⓤßẞ", result, info);
+ assertEquals("forDefault.toASCII(Fⓤßẞ)", "xn--fu-hiaa", result.toString());
+ }
+
@Test
public void TestTooLong() {
// ICU-13727: Limit input length for n^2 algorithm