mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 13:35:32 +00:00
ICU-21030 validate ACE label edge cases
This commit is contained in:
parent
e19d12997b
commit
9279e49d2f
4 changed files with 84 additions and 11 deletions
|
@ -714,6 +714,16 @@ UTS46::processLabel(UnicodeString &dest,
|
|||
UBool wasPunycode;
|
||||
if(labelLength>=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && label[3]==0x2d) {
|
||||
// Label starts with "xn--", try to un-Punycode it.
|
||||
// In IDNA2008, labels like "xn--" (decodes to an empty string) and
|
||||
// "xn--ASCII-" (decodes to just "ASCII") fail the round-trip validation from
|
||||
// comparing the ToUnicode input with the back-to-ToASCII output.
|
||||
// They are alternate encodings of the respective ASCII labels.
|
||||
// Ignore "xn---" here: It will fail Punycode.decode() which logically comes before
|
||||
// the round-trip verification.
|
||||
if(labelLength==4 || (labelLength>5 && label[labelLength-1]==u'-')) {
|
||||
info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL;
|
||||
return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode);
|
||||
}
|
||||
wasPunycode=TRUE;
|
||||
UChar *unicodeBuffer=fromPunycode.getBuffer(-1); // capacity==-1: most labels should fit
|
||||
if(unicodeBuffer==NULL) {
|
||||
|
@ -925,10 +935,10 @@ UTS46::markBadACELabel(UnicodeString &dest,
|
|||
UBool isASCII=TRUE;
|
||||
UBool onlyLDH=TRUE;
|
||||
const UChar *label=dest.getBuffer()+labelStart;
|
||||
// Ok to cast away const because we own the UnicodeString.
|
||||
UChar *s=(UChar *)label+4; // After the initial "xn--".
|
||||
const UChar *limit=label+labelLength;
|
||||
do {
|
||||
// Start after the initial "xn--".
|
||||
// Ok to cast away const because we own the UnicodeString.
|
||||
for(UChar *s=const_cast<UChar *>(label+4); s<limit; ++s) {
|
||||
UChar c=*s;
|
||||
if(c<=0x7f) {
|
||||
if(c==0x2e) {
|
||||
|
@ -945,7 +955,7 @@ UTS46::markBadACELabel(UnicodeString &dest,
|
|||
} else {
|
||||
isASCII=onlyLDH=FALSE;
|
||||
}
|
||||
} while(++s<limit);
|
||||
}
|
||||
if(onlyLDH) {
|
||||
dest.insert(labelStart+labelLength, (UChar)0xfffd);
|
||||
if(dest.isBogus()) {
|
||||
|
|
|
@ -40,6 +40,7 @@ public:
|
|||
void TestAPI();
|
||||
void TestNotSTD3();
|
||||
void TestInvalidPunycodeDigits();
|
||||
void TestACELabelEdgeCases();
|
||||
void TestSomeCases();
|
||||
void IdnaTest();
|
||||
|
||||
|
@ -84,6 +85,7 @@ void UTS46Test::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
|
|||
TESTCASE_AUTO(TestAPI);
|
||||
TESTCASE_AUTO(TestNotSTD3);
|
||||
TESTCASE_AUTO(TestInvalidPunycodeDigits);
|
||||
TESTCASE_AUTO(TestACELabelEdgeCases);
|
||||
TESTCASE_AUTO(TestSomeCases);
|
||||
TESTCASE_AUTO(IdnaTest);
|
||||
TESTCASE_AUTO_END;
|
||||
|
@ -312,6 +314,35 @@ void UTS46Test::TestInvalidPunycodeDigits() {
|
|||
}
|
||||
}
|
||||
|
||||
void UTS46Test::TestACELabelEdgeCases() {
|
||||
// In IDNA2008, these labels fail the round-trip validation from comparing
|
||||
// the ToUnicode input with the back-to-ToASCII output.
|
||||
IcuTestErrorCode errorCode(*this, "TestACELabelEdgeCases()");
|
||||
LocalPointer<IDNA> idna(IDNA::createUTS46Instance(0, errorCode));
|
||||
if(errorCode.isFailure()) {
|
||||
return;
|
||||
}
|
||||
UnicodeString result;
|
||||
{
|
||||
IDNAInfo info;
|
||||
idna->labelToUnicode(u"xn--", result, info, errorCode);
|
||||
assertTrue("empty xn--", (info.getErrors()&UIDNA_ERROR_INVALID_ACE_LABEL)!=0);
|
||||
}
|
||||
{
|
||||
IDNAInfo info;
|
||||
idna->labelToUnicode(u"xN--ASCII-", result, info, errorCode);
|
||||
assertTrue("nothing but ASCII", (info.getErrors()&UIDNA_ERROR_INVALID_ACE_LABEL)!=0);
|
||||
}
|
||||
{
|
||||
// Different error: The Punycode decoding procedure does not consume the last delimiter
|
||||
// if it is right after the xn-- so the main decoding loop fails because the hyphen
|
||||
// is not a valid Punycode digit.
|
||||
IDNAInfo info;
|
||||
idna->labelToUnicode(u"Xn---", result, info, errorCode);
|
||||
assertTrue("empty Xn---", (info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
|
||||
}
|
||||
}
|
||||
|
||||
struct TestCase {
|
||||
// Input string and options string (Nontransitional/Transitional/Both).
|
||||
const char *s, *o;
|
||||
|
@ -558,13 +589,13 @@ static const TestCase testCases[]={
|
|||
UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|
|
||||
UIDNA_ERROR_HYPHEN_3_4 },
|
||||
{ "a..c", "B", "a..c", UIDNA_ERROR_EMPTY_LABEL },
|
||||
{ "a.xn--.c", "B", "a..c", UIDNA_ERROR_EMPTY_LABEL },
|
||||
{ "a.xn--.c", "B", "a.xn--\\uFFFD.c", UIDNA_ERROR_INVALID_ACE_LABEL },
|
||||
{ "a.-b.", "B", "a.-b.", UIDNA_ERROR_LEADING_HYPHEN },
|
||||
{ "a.b-.c", "B", "a.b-.c", UIDNA_ERROR_TRAILING_HYPHEN },
|
||||
{ "a.-.c", "B", "a.-.c", UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN },
|
||||
{ "a.bc--de.f", "B", "a.bc--de.f", UIDNA_ERROR_HYPHEN_3_4 },
|
||||
{ "\\u00E4.\\u00AD.c", "B", "\\u00E4..c", UIDNA_ERROR_EMPTY_LABEL },
|
||||
{ "\\u00E4.xn--.c", "B", "\\u00E4..c", UIDNA_ERROR_EMPTY_LABEL },
|
||||
{ "\\u00E4.xn--.c", "B", "\\u00E4.xn--\\uFFFD.c", UIDNA_ERROR_INVALID_ACE_LABEL },
|
||||
{ "\\u00E4.-b.", "B", "\\u00E4.-b.", UIDNA_ERROR_LEADING_HYPHEN },
|
||||
{ "\\u00E4.b-.c", "B", "\\u00E4.b-.c", UIDNA_ERROR_TRAILING_HYPHEN },
|
||||
{ "\\u00E4.-.c", "B", "\\u00E4.-.c", UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN },
|
||||
|
|
|
@ -341,6 +341,16 @@ public final class UTS46 extends IDNA {
|
|||
dest.charAt(labelStart+2)=='-' && dest.charAt(labelStart+3)=='-'
|
||||
) {
|
||||
// Label starts with "xn--", try to un-Punycode it.
|
||||
// In IDNA2008, labels like "xn--" (decodes to an empty string) and
|
||||
// "xn--ASCII-" (decodes to just "ASCII") fail the round-trip validation from
|
||||
// comparing the ToUnicode input with the back-to-ToASCII output.
|
||||
// They are alternate encodings of the respective ASCII labels.
|
||||
// Ignore "xn---" here: It will fail Punycode.decode() which logically comes before
|
||||
// the round-trip verification.
|
||||
if(labelLength==4 || (labelLength>5 && dest.charAt(labelStart+labelLength-1)=='-')) {
|
||||
addLabelError(info, Error.INVALID_ACE_LABEL);
|
||||
return markBadACELabel(dest, labelStart, labelLength, toASCII, info);
|
||||
}
|
||||
wasPunycode=true;
|
||||
try {
|
||||
fromPunycode=Punycode.decode(dest.subSequence(labelStart+4, labelStart+labelLength), null);
|
||||
|
@ -496,9 +506,9 @@ public final class UTS46 extends IDNA {
|
|||
boolean disallowNonLDHDot=(options&USE_STD3_RULES)!=0;
|
||||
boolean isASCII=true;
|
||||
boolean onlyLDH=true;
|
||||
int i=labelStart+4; // After the initial "xn--".
|
||||
int limit=labelStart+labelLength;
|
||||
do {
|
||||
// Start after the initial "xn--".
|
||||
for(int i=labelStart+4; i<limit; ++i) {
|
||||
char c=dest.charAt(i);
|
||||
if(c<=0x7f) {
|
||||
if(c=='.') {
|
||||
|
@ -515,7 +525,7 @@ public final class UTS46 extends IDNA {
|
|||
} else {
|
||||
isASCII=onlyLDH=false;
|
||||
}
|
||||
} while(++i<limit);
|
||||
}
|
||||
if(onlyLDH) {
|
||||
dest.insert(labelStart+labelLength, '\ufffd');
|
||||
++labelLength;
|
||||
|
|
|
@ -152,6 +152,28 @@ public class UTS46Test extends TestFmwk {
|
|||
info.getErrors().contains(IDNA.Error.PUNYCODE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestACELabelEdgeCases() {
|
||||
// In IDNA2008, these labels fail the round-trip validation from comparing
|
||||
// the ToUnicode input with the back-to-ToASCII output.
|
||||
IDNA idna=IDNA.getUTS46Instance(0);
|
||||
StringBuilder result=new StringBuilder();
|
||||
IDNA.Info info=new IDNA.Info();
|
||||
idna.labelToUnicode("xn--", result, info);
|
||||
assertTrue("empty xn--", info.getErrors().contains(IDNA.Error.INVALID_ACE_LABEL));
|
||||
|
||||
info=new IDNA.Info();
|
||||
idna.labelToUnicode("xN--ASCII-", result, info);
|
||||
assertTrue("nothing but ASCII", info.getErrors().contains(IDNA.Error.INVALID_ACE_LABEL));
|
||||
|
||||
// Different error: The Punycode decoding procedure does not consume the last delimiter
|
||||
// if it is right after the xn-- so the main decoding loop fails because the hyphen
|
||||
// is not a valid Punycode digit.
|
||||
info=new IDNA.Info();
|
||||
idna.labelToUnicode("Xn---", result, info);
|
||||
assertTrue("empty Xn---", info.getErrors().contains(IDNA.Error.PUNYCODE));
|
||||
}
|
||||
|
||||
private static final Map<String, IDNA.Error> errorNamesToErrors;
|
||||
static {
|
||||
errorNamesToErrors=new TreeMap<>();
|
||||
|
@ -432,13 +454,13 @@ public class UTS46Test extends TestFmwk {
|
|||
"UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|"+
|
||||
"UIDNA_ERROR_HYPHEN_3_4" },
|
||||
{ "a..c", "B", "a..c", "UIDNA_ERROR_EMPTY_LABEL" },
|
||||
{ "a.xn--.c", "B", "a..c", "UIDNA_ERROR_EMPTY_LABEL" },
|
||||
{ "a.xn--.c", "B", "a.xn--\uFFFD.c", "UIDNA_ERROR_INVALID_ACE_LABEL" },
|
||||
{ "a.-b.", "B", "a.-b.", "UIDNA_ERROR_LEADING_HYPHEN" },
|
||||
{ "a.b-.c", "B", "a.b-.c", "UIDNA_ERROR_TRAILING_HYPHEN" },
|
||||
{ "a.-.c", "B", "a.-.c", "UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN" },
|
||||
{ "a.bc--de.f", "B", "a.bc--de.f", "UIDNA_ERROR_HYPHEN_3_4" },
|
||||
{ "\u00E4.\u00AD.c", "B", "\u00E4..c", "UIDNA_ERROR_EMPTY_LABEL" },
|
||||
{ "\u00E4.xn--.c", "B", "\u00E4..c", "UIDNA_ERROR_EMPTY_LABEL" },
|
||||
{ "\u00E4.xn--.c", "B", "\u00E4.xn--\uFFFD.c", "UIDNA_ERROR_INVALID_ACE_LABEL" },
|
||||
{ "\u00E4.-b.", "B", "\u00E4.-b.", "UIDNA_ERROR_LEADING_HYPHEN" },
|
||||
{ "\u00E4.b-.c", "B", "\u00E4.b-.c", "UIDNA_ERROR_TRAILING_HYPHEN" },
|
||||
{ "\u00E4.-.c", "B", "\u00E4.-.c", "UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN" },
|
||||
|
|
Loading…
Add table
Reference in a new issue