mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 05:25:34 +00:00
ICU-22294 UTS46 transitional=deprecated, change DEFAULT
This commit is contained in:
parent
415a7accc5
commit
f062f52c12
5 changed files with 217 additions and 138 deletions
|
@ -70,6 +70,7 @@ public:
|
|||
* The worker functions use transitional processing, including deviation mappings,
|
||||
* unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
|
||||
* is used in which case the deviation characters are passed through without change.
|
||||
* <b>Unicode 15.1 UTS #46 deprecated transitional processing.</b>
|
||||
*
|
||||
* Disallowed characters are mapped to U+FFFD.
|
||||
*
|
||||
|
@ -82,6 +83,8 @@ public:
|
|||
* letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
|
||||
*
|
||||
* @param options Bit set to modify the processing and error checking.
|
||||
* These should include UIDNA_DEFAULT, or
|
||||
* UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE.
|
||||
* See option bit set values in uidna.h.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
|
|
|
@ -49,11 +49,19 @@
|
|||
*/
|
||||
enum {
|
||||
/**
|
||||
* Default options value: None of the other options are set.
|
||||
* Default options value: UTS #46 nontransitional processing.
|
||||
* For use in static worker and factory methods.
|
||||
*
|
||||
* Since ICU 76, this is the same as
|
||||
* UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE,
|
||||
* corresponding to Unicode 15.1 UTS #46 deprecating transitional processing.
|
||||
* (These options are ignored by the IDNA2003 implementation.)
|
||||
*
|
||||
* Before ICU 76, this constant did not set any of the options.
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
UIDNA_DEFAULT=0,
|
||||
UIDNA_DEFAULT=0x30,
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* Option to allow unassigned code points in domain names and labels.
|
||||
|
@ -91,19 +99,27 @@ enum {
|
|||
/**
|
||||
* IDNA option for nontransitional processing in ToASCII().
|
||||
* For use in static worker and factory methods.
|
||||
*
|
||||
* <p>By default, ToASCII() uses transitional processing.
|
||||
* Unicode 15.1 UTS #46 deprecated transitional processing.
|
||||
*
|
||||
* <p>This option is ignored by the IDNA2003 implementation.
|
||||
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
|
||||
* @stable ICU 4.6
|
||||
* @see UIDNA_DEFAULT
|
||||
*/
|
||||
UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
|
||||
/**
|
||||
* IDNA option for nontransitional processing in ToUnicode().
|
||||
* For use in static worker and factory methods.
|
||||
*
|
||||
* <p>By default, ToUnicode() uses transitional processing.
|
||||
* Unicode 15.1 UTS #46 deprecated transitional processing.
|
||||
*
|
||||
* <p>This option is ignored by the IDNA2003 implementation.
|
||||
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
|
||||
* @stable ICU 4.6
|
||||
* @see UIDNA_DEFAULT
|
||||
*/
|
||||
UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
|
||||
/**
|
||||
|
@ -134,6 +150,8 @@ typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
|
|||
* For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
|
||||
*
|
||||
* @param options Bit set to modify the processing and error checking.
|
||||
* These should include UIDNA_DEFAULT, or
|
||||
* UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE.
|
||||
* See option bit set values in uidna.h.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
|
|
|
@ -42,6 +42,7 @@ public:
|
|||
void TestNotSTD3();
|
||||
void TestInvalidPunycodeDigits();
|
||||
void TestACELabelEdgeCases();
|
||||
void TestDefaultNontransitional();
|
||||
void TestTooLong();
|
||||
void TestSomeCases();
|
||||
void IdnaTest();
|
||||
|
@ -88,6 +89,7 @@ void UTS46Test::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
|
|||
TESTCASE_AUTO(TestNotSTD3);
|
||||
TESTCASE_AUTO(TestInvalidPunycodeDigits);
|
||||
TESTCASE_AUTO(TestACELabelEdgeCases);
|
||||
TESTCASE_AUTO(TestDefaultNontransitional);
|
||||
TESTCASE_AUTO(TestTooLong);
|
||||
TESTCASE_AUTO(TestSomeCases);
|
||||
TESTCASE_AUTO(IdnaTest);
|
||||
|
@ -354,6 +356,27 @@ void UTS46Test::TestACELabelEdgeCases() {
|
|||
}
|
||||
}
|
||||
|
||||
void UTS46Test::TestDefaultNontransitional() {
|
||||
IcuTestErrorCode errorCode(*this, "TestDefaultNontransitional()");
|
||||
// Unicode 15.1 UTS #46 deprecated transitional processing.
|
||||
// ICU 76 changed UIDNA_DEFAULT to set the nontransitional options.
|
||||
LocalPointer<IDNA> forZero(IDNA::createUTS46Instance(0, errorCode));
|
||||
LocalPointer<IDNA> forDefault(IDNA::createUTS46Instance(UIDNA_DEFAULT, errorCode));
|
||||
if(errorCode.isFailure()) {
|
||||
return;
|
||||
}
|
||||
UnicodeString result;
|
||||
IDNAInfo info;
|
||||
forZero->labelToUnicode(u"Fⓤßẞ", result, info, errorCode);
|
||||
assertEquals("forZero.toUnicode(Fⓤßẞ)", u"fussss", result);
|
||||
forZero->labelToASCII(u"Fⓤßẞ", result, info, errorCode);
|
||||
assertEquals("forZero.toASCII(Fⓤßẞ)", u"fussss", result);
|
||||
forDefault->labelToUnicode(u"Fⓤßẞ", result, info, errorCode);
|
||||
assertEquals("forDefault.toUnicode(Fⓤßẞ)", u"fußß", result);
|
||||
forDefault->labelToASCII(u"Fⓤßẞ", result, info, errorCode);
|
||||
assertEquals("forDefault.toASCII(Fⓤßẞ)", u"xn--fu-hiaa", result);
|
||||
}
|
||||
|
||||
void UTS46Test::TestTooLong() {
|
||||
// ICU-13727: Limit input length for n^2 algorithm
|
||||
// where well-formed strings are at most 59 characters long.
|
||||
|
|
|
@ -24,7 +24,7 @@ import com.ibm.icu.impl.UTS46;
|
|||
* The IDNA class is not intended for public subclassing.
|
||||
* <p>
|
||||
* The non-static methods implement UTS #46 and IDNA2008.
|
||||
* IDNA2008 is implemented according to UTS #46, see getUTS46Instance().
|
||||
* IDNA2008 is implemented according to UTS #46, see {@link #getUTS46Instance(int)}.
|
||||
* <p>
|
||||
* IDNA2003 is obsolete. The static methods implement IDNA2003. They are all deprecated.
|
||||
* <p>
|
||||
|
@ -32,35 +32,43 @@ import com.ibm.icu.impl.UTS46;
|
|||
* <p>
|
||||
* The static IDNA API methods implement the IDNA protocol as defined in the
|
||||
* <a href="http://www.ietf.org/rfc/rfc3490.txt">IDNA RFC</a>.
|
||||
* The draft defines 2 operations: ToASCII and ToUnicode. Domain labels
|
||||
* The draft defines 2 operations: ToASCII and ToUnicode. Domain labels
|
||||
* containing non-ASCII code points are required to be processed by
|
||||
* ToASCII operation before passing it to resolver libraries. Domain names
|
||||
* that are obtained from resolver libraries are required to be processed by
|
||||
* ToUnicode operation before displaying the domain name to the user.
|
||||
* IDNA requires that implementations process input strings with
|
||||
* <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a>,
|
||||
* which is a profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a> ,
|
||||
* and then with <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a>.
|
||||
* Implementations of IDNA MUST fully implement Nameprep and Punycode;
|
||||
* IDNA requires that implementations process input strings with
|
||||
* <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a>,
|
||||
* which is a profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a> ,
|
||||
* and then with <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a>.
|
||||
* Implementations of IDNA MUST fully implement Nameprep and Punycode;
|
||||
* neither Nameprep nor Punycode are optional.
|
||||
* The input and output of ToASCII and ToUnicode operations are Unicode
|
||||
* The input and output of ToASCII and ToUnicode operations are Unicode
|
||||
* and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
|
||||
* multiple times to an input string will yield the same result as applying the operation
|
||||
* once.
|
||||
* ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
|
||||
* ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
|
||||
* ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
|
||||
*
|
||||
*
|
||||
* @author Ram Viswanadha, Markus Scherer
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
public abstract class IDNA {
|
||||
/**
|
||||
* Default options value: None of the other options are set.
|
||||
/**
|
||||
* Default options value: UTS #46 nontransitional processing.
|
||||
* For use in static worker and factory methods.
|
||||
*
|
||||
* <p>Since ICU 76, this is the same as
|
||||
* {@link #NONTRANSITIONAL_TO_ASCII} | {@link #NONTRANSITIONAL_TO_UNICODE},
|
||||
* corresponding to Unicode 15.1 UTS #46 deprecating transitional processing.
|
||||
* (These options are ignored by the IDNA2003 implementation.)
|
||||
*
|
||||
* <p>Before ICU 76, this constant did not set any of the options.
|
||||
*
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
public static final int DEFAULT = 0;
|
||||
/**
|
||||
public static final int DEFAULT = 0x30;
|
||||
/**
|
||||
* Option to allow unassigned code points in domain names and labels.
|
||||
* For use in static worker and factory methods.
|
||||
* <p>This option is ignored by the UTS46 implementation.
|
||||
|
@ -69,7 +77,7 @@ public abstract class IDNA {
|
|||
*/
|
||||
@Deprecated
|
||||
public static final int ALLOW_UNASSIGNED = 1;
|
||||
/**
|
||||
/**
|
||||
* Option to check whether the input conforms to the STD3 ASCII rules,
|
||||
* for example the restriction of labels to LDH characters
|
||||
* (ASCII Letters, Digits and Hyphen-Minus).
|
||||
|
@ -96,7 +104,10 @@ public abstract class IDNA {
|
|||
/**
|
||||
* IDNA option for nontransitional processing in ToASCII().
|
||||
* For use in static worker and factory methods.
|
||||
*
|
||||
* <p>By default, ToASCII() uses transitional processing.
|
||||
* Unicode 15.1 UTS #46 deprecated transitional processing.
|
||||
*
|
||||
* <p>This option is ignored by the IDNA2003 implementation.
|
||||
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
|
||||
* @stable ICU 4.6
|
||||
|
@ -105,7 +116,10 @@ public abstract class IDNA {
|
|||
/**
|
||||
* IDNA option for nontransitional processing in ToUnicode().
|
||||
* For use in static worker and factory methods.
|
||||
*
|
||||
* <p>By default, ToUnicode() uses transitional processing.
|
||||
* Unicode 15.1 UTS #46 deprecated transitional processing.
|
||||
*
|
||||
* <p>This option is ignored by the IDNA2003 implementation.
|
||||
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
|
||||
* @stable ICU 4.6
|
||||
|
@ -133,8 +147,9 @@ public abstract class IDNA {
|
|||
* IDNA2003 and IDNA2008.
|
||||
* <p>
|
||||
* The worker functions use transitional processing, including deviation mappings,
|
||||
* unless NONTRANSITIONAL_TO_ASCII or NONTRANSITIONAL_TO_UNICODE
|
||||
* unless {@link #NONTRANSITIONAL_TO_ASCII} or {@link #NONTRANSITIONAL_TO_UNICODE}
|
||||
* is used in which case the deviation characters are passed through without change.
|
||||
* <b>Unicode 15.1 UTS #46 deprecated transitional processing.</b>
|
||||
* <p>
|
||||
* Disallowed characters are mapped to U+FFFD.
|
||||
* <p>
|
||||
|
@ -146,6 +161,8 @@ public abstract class IDNA {
|
|||
* letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
|
||||
*
|
||||
* @param options Bit set to modify the processing and error checking.
|
||||
* These should include {@link IDNA#DEFAULT}, or
|
||||
* {@link IDNA#NONTRANSITIONAL_TO_ASCII} | {@link IDNA#NONTRANSITIONAL_TO_UNICODE}.
|
||||
* @return the UTS #46 IDNA instance, if successful
|
||||
* @stable ICU 4.6
|
||||
*/
|
||||
|
@ -474,22 +491,22 @@ public abstract class IDNA {
|
|||
* IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
* ASCII names. A label is an individual part of a domain name. Labels are usually
|
||||
* separated by dots; e.g." "www.example.com" is composed of 3 labels
|
||||
* separated by dots; e.g." "www.example.com" is composed of 3 labels
|
||||
* "www","example", and "com".
|
||||
*
|
||||
* @param src The input string to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* If unassigned code points are found the operation fails with
|
||||
* StringPrepParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @throws StringPrepParseException When an error occurs for parsing a string.
|
||||
|
@ -501,27 +518,27 @@ public abstract class IDNA {
|
|||
UCharacterIterator iter = UCharacterIterator.getInstance(src);
|
||||
return convertToASCII(iter,options);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
* ASCII names. A label is an individual part of a domain name. Labels are usually
|
||||
* separated by dots; e.g." "www.example.com" is composed of 3 labels
|
||||
* separated by dots; e.g." "www.example.com" is composed of 3 labels
|
||||
* "www","example", and "com".
|
||||
*
|
||||
* @param src The input string as StringBuffer to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
|
||||
|
@ -532,27 +549,27 @@ public abstract class IDNA {
|
|||
UCharacterIterator iter = UCharacterIterator.getInstance(src);
|
||||
return convertToASCII(iter,options);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
* ASCII names. A label is an individual part of a domain name. Labels are usually
|
||||
* separated by dots; e.g." "www.example.com" is composed of 3 labels
|
||||
* separated by dots; e.g." "www.example.com" is composed of 3 labels
|
||||
* "www","example", and "com".
|
||||
*
|
||||
* @param src The input string as UCharacterIterator to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
|
||||
|
@ -565,29 +582,29 @@ public abstract class IDNA {
|
|||
|
||||
/**
|
||||
* IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
* It is important to note that this operation can fail. If it fails, then the input
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
* It is important to note that this operation can fail. If it fails, then the input
|
||||
* domain name cannot be used as an Internationalized Domain Name and the application
|
||||
* should have methods defined to deal with the failure.
|
||||
*
|
||||
*
|
||||
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src The input string as UCharacterIterator to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
|
||||
|
@ -595,34 +612,34 @@ public abstract class IDNA {
|
|||
@Deprecated
|
||||
public static StringBuffer convertIDNToASCII(UCharacterIterator src, int options)
|
||||
throws StringPrepParseException{
|
||||
return convertIDNToASCII(src.getText(), options);
|
||||
return convertIDNToASCII(src.getText(), options);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
* It is important to note that this operation can fail. If it fails, then the input
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
* It is important to note that this operation can fail. If it fails, then the input
|
||||
* domain name cannot be used as an Internationalized Domain Name and the application
|
||||
* should have methods defined to deal with the failure.
|
||||
*
|
||||
*
|
||||
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src The input string as a StringBuffer to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
|
||||
|
@ -630,34 +647,34 @@ public abstract class IDNA {
|
|||
@Deprecated
|
||||
public static StringBuffer convertIDNToASCII(StringBuffer src, int options)
|
||||
throws StringPrepParseException{
|
||||
return convertIDNToASCII(src.toString(), options);
|
||||
return convertIDNToASCII(src.toString(), options);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
* It is important to note that this operation can fail. If it fails, then the input
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
* It is important to note that this operation can fail. If it fails, then the input
|
||||
* domain name cannot be used as an Internationalized Domain Name and the application
|
||||
* should have methods defined to deal with the failure.
|
||||
*
|
||||
*
|
||||
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src The input string to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
|
||||
|
@ -668,27 +685,27 @@ public abstract class IDNA {
|
|||
return IDNA2003.convertIDNToASCII(src, options);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
* Unicode names. A label is an individual part of a domain name. Labels are usually
|
||||
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
|
||||
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
|
||||
* "www","example", and "com".
|
||||
*
|
||||
*
|
||||
* @param src The input string to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
|
||||
|
@ -699,27 +716,27 @@ public abstract class IDNA {
|
|||
UCharacterIterator iter = UCharacterIterator.getInstance(src);
|
||||
return convertToUnicode(iter,options);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
* Unicode names. A label is an individual part of a domain name. Labels are usually
|
||||
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
|
||||
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
|
||||
* "www","example", and "com".
|
||||
*
|
||||
*
|
||||
* @param src The input string as StringBuffer to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
|
||||
|
@ -730,27 +747,27 @@ public abstract class IDNA {
|
|||
UCharacterIterator iter = UCharacterIterator.getInstance(src);
|
||||
return convertToUnicode(iter,options);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* IDNA2003: Function that implements the ToUnicode operation as defined in the IDNA RFC.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
* Unicode names. A label is an individual part of a domain name. Labels are usually
|
||||
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
|
||||
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
|
||||
* "www","example", and "com".
|
||||
*
|
||||
*
|
||||
* @param src The input string as UCharacterIterator to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
|
||||
|
@ -760,29 +777,29 @@ public abstract class IDNA {
|
|||
throws StringPrepParseException{
|
||||
return IDNA2003.convertToUnicode(src, options);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
*
|
||||
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src The input string as UCharacterIterator to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
|
||||
|
@ -792,29 +809,29 @@ public abstract class IDNA {
|
|||
throws StringPrepParseException{
|
||||
return convertIDNToUnicode(src.getText(), options);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
*
|
||||
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src The input string as StringBuffer to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
|
||||
|
@ -824,29 +841,29 @@ public abstract class IDNA {
|
|||
throws StringPrepParseException{
|
||||
return convertIDNToUnicode(src.toString(), options);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
*
|
||||
* <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src The input string to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return StringBuffer the converted String
|
||||
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
|
||||
|
@ -856,30 +873,30 @@ public abstract class IDNA {
|
|||
throws StringPrepParseException{
|
||||
return IDNA2003.convertIDNToUnicode(src, options);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* IDNA2003: Compare two IDN strings for equivalence.
|
||||
* This function splits the domain names into labels and compares them.
|
||||
* According to IDN RFC, whenever two labels are compared, they are
|
||||
* considered equal if and only if their ASCII forms (obtained by
|
||||
* According to IDN RFC, whenever two labels are compared, they are
|
||||
* considered equal if and only if their ASCII forms (obtained by
|
||||
* applying toASCII) match using an case-insensitive ASCII comparison.
|
||||
* Two domain names are considered a match if and only if all labels
|
||||
* Two domain names are considered a match if and only if all labels
|
||||
* match regardless of whether label separators match.
|
||||
*
|
||||
*
|
||||
* @param s1 First IDN string as StringBuffer
|
||||
* @param s2 Second IDN string as StringBuffer
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return 0 if the strings are equal, > 0 if s1 > s2 and < 0 if s1 < s2
|
||||
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
|
||||
|
@ -892,30 +909,30 @@ public abstract class IDNA {
|
|||
}
|
||||
return IDNA2003.compare(s1.toString(), s2.toString(), options);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* IDNA2003: Compare two IDN strings for equivalence.
|
||||
* This function splits the domain names into labels and compares them.
|
||||
* According to IDN RFC, whenever two labels are compared, they are
|
||||
* considered equal if and only if their ASCII forms (obtained by
|
||||
* According to IDN RFC, whenever two labels are compared, they are
|
||||
* considered equal if and only if their ASCII forms (obtained by
|
||||
* applying toASCII) match using an case-insensitive ASCII comparison.
|
||||
* Two domain names are considered a match if and only if all labels
|
||||
* Two domain names are considered a match if and only if all labels
|
||||
* match regardless of whether label separators match.
|
||||
*
|
||||
* @param s1 First IDN string
|
||||
*
|
||||
* @param s1 First IDN string
|
||||
* @param s2 Second IDN string
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return 0 if the strings are equal, > 0 if s1 > s2 and < 0 if s1 < s2
|
||||
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
|
||||
|
@ -930,26 +947,26 @@ public abstract class IDNA {
|
|||
/**
|
||||
* IDNA2003: Compare two IDN strings for equivalence.
|
||||
* This function splits the domain names into labels and compares them.
|
||||
* According to IDN RFC, whenever two labels are compared, they are
|
||||
* considered equal if and only if their ASCII forms (obtained by
|
||||
* According to IDN RFC, whenever two labels are compared, they are
|
||||
* considered equal if and only if their ASCII forms (obtained by
|
||||
* applying toASCII) match using an case-insensitive ASCII comparison.
|
||||
* Two domain names are considered a match if and only if all labels
|
||||
* Two domain names are considered a match if and only if all labels
|
||||
* match regardless of whether label separators match.
|
||||
*
|
||||
*
|
||||
* @param s1 First IDN string as UCharacterIterator
|
||||
* @param s2 Second IDN string as UCharacterIterator
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* If unassigned code points are found the operation fails with
|
||||
* ParseException.
|
||||
*
|
||||
* - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
*
|
||||
* - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with ParseException
|
||||
* @return 0 if the strings are equal, > 0 if i1 > i2 and < 0 if i1 < i2
|
||||
* @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
|
||||
|
|
|
@ -179,6 +179,24 @@ public class UTS46Test extends CoreTestFmwk {
|
|||
info.getErrors().contains(IDNA.Error.INVALID_ACE_LABEL));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestDefaultNontransitional() {
|
||||
// Unicode 15.1 UTS #46 deprecated transitional processing.
|
||||
// ICU 76 changed IDNA.DEFAULT to set the nontransitional options.
|
||||
IDNA forZero = IDNA.getUTS46Instance(0);
|
||||
IDNA forDefault = IDNA.getUTS46Instance(IDNA.DEFAULT);
|
||||
StringBuilder result = new StringBuilder();
|
||||
IDNA.Info info = new IDNA.Info();
|
||||
forZero.labelToUnicode("Fⓤßẞ", result, info);
|
||||
assertEquals("forZero.toUnicode(Fⓤßẞ)", "fussss", result.toString());
|
||||
forZero.labelToASCII("Fⓤßẞ", result, info);
|
||||
assertEquals("forZero.toASCII(Fⓤßẞ)", "fussss", result.toString());
|
||||
forDefault.labelToUnicode("Fⓤßẞ", result, info);
|
||||
assertEquals("forDefault.toUnicode(Fⓤßẞ)", "fußß", result.toString());
|
||||
forDefault.labelToASCII("Fⓤßẞ", result, info);
|
||||
assertEquals("forDefault.toASCII(Fⓤßẞ)", "xn--fu-hiaa", result.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestTooLong() {
|
||||
// ICU-13727: Limit input length for n^2 algorithm
|
||||
|
|
Loading…
Add table
Reference in a new issue