diff --git a/icu4c/source/common/sprpimpl.h b/icu4c/source/common/sprpimpl.h index 2acd3a0bba3..c545fe3e98a 100644 --- a/icu4c/source/common/sprpimpl.h +++ b/icu4c/source/common/sprpimpl.h @@ -77,10 +77,11 @@ void uprv_syntaxError(const UChar* rules, stop = ((pos+U_PARSE_CONTEXT_LEN)<= rulesLen )? (pos+(U_PARSE_CONTEXT_LEN)) : rulesLen; - - u_memcpy(parseError->postContext,rules+start,stop-start); - //null terminate the buffer - parseError->postContext[stop-start]= 0; + if(start < stop){ + u_memcpy(parseError->postContext,rules+start,stop-start); + //null terminate the buffer + parseError->postContext[stop-start]= 0; + } } #endif diff --git a/icu4c/source/common/uidna.cpp b/icu4c/source/common/uidna.cpp index 987777f604b..27d7a338d16 100644 --- a/icu4c/source/common/uidna.cpp +++ b/icu4c/source/common/uidna.cpp @@ -315,7 +315,7 @@ uidna_toUnicode(const UChar* src, int32_t srcLength, //get the options UBool allowUnassigned = (UBool)((options & UIDNA_ALLOW_UNASSIGNED) != 0); - // UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); + UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; @@ -332,10 +332,13 @@ uidna_toUnicode(const UChar* src, int32_t srcLength, UBool* caseFlags = NULL; UBool srcIsASCII = TRUE; + UBool srcIsLDH = TRUE; + int32_t failPos =0; if(U_FAILURE(*status)){ goto CLEANUP; } + // step 1: find out if all the codepoints in src are ASCII if(srcLength==-1){ srcLength = 0; @@ -343,6 +346,13 @@ uidna_toUnicode(const UChar* src, int32_t srcLength, if(src[srcLength]> 0x7f){ srcIsASCII = FALSE; } + // here we do not assemble surrogates + // since we know that LDH code points + // are in the ASCII range only + if(prep->isLDHChar(src[srcLength])==FALSE){ + srcIsLDH = FALSE; + failPos = srcLength; + } srcLength++; } }else{ @@ -350,6 +360,13 @@ uidna_toUnicode(const UChar* src, int32_t srcLength, if(src[j]> 0x7f){ srcIsASCII = FALSE; } + // here we do not assemble surrogates + // since we know that LDH code points + // are in the ASCII range only + if(prep->isLDHChar(src[j])==FALSE){ + srcIsLDH = FALSE; + failPos = j; + } } } @@ -379,6 +396,7 @@ uidna_toUnicode(const UChar* src, int32_t srcLength, b1 = (UChar*) src; b1Len = srcLength; } + //step 3: verify ACE Prefix if(startsWithPrefix(src,srcLength)){ @@ -439,6 +457,27 @@ uidna_toUnicode(const UChar* src, int32_t srcLength, uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); } }else{ + // verify that STD3 ASCII rules are satisfied + if(useSTD3ASCIIRules == TRUE){ + if( srcIsLDH == FALSE /* source contains some non-LDH characters */ + || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ + *status = U_IDNA_STD3_ASCII_RULES_ERROR; + + /* populate the parseError struct */ + if(srcIsLDH==FALSE){ + // failPos is always set the index of failure + uprv_syntaxError(src,failPos, srcLength,parseError); + }else if(src[0] == HYPHEN){ + // fail position is 0 + uprv_syntaxError(src,0,srcLength,parseError); + }else{ + // the last index in the source is always length-1 + uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); + } + + goto CLEANUP; + } + } //copy the source to destination if(srcLength <= destCapacity){ uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); @@ -457,6 +496,20 @@ CLEANUP: uprv_free(caseFlags); delete prep; + + // The RFC states that + // + // ToUnicode never fails. If any step fails, then the original input + // is returned immediately in that step. + // + // So if any step fails lets copy source to destination + if(U_FAILURE(*status)){ + //copy the source to destination + if(srcLength <= destCapacity){ + uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); + } + reqLength = srcLength; + } return u_terminateUChars(dest, destCapacity, reqLength, status); } diff --git a/icu4c/source/test/intltest/idnaref.cpp b/icu4c/source/test/intltest/idnaref.cpp index e0991ae4ae3..7b2f63200e5 100644 --- a/icu4c/source/test/intltest/idnaref.cpp +++ b/icu4c/source/test/intltest/idnaref.cpp @@ -437,12 +437,14 @@ idnaref_toUnicode(const UChar* src, int32_t srcLength, b1Len = 0; UBool* caseFlags = NULL; - UBool srcIsASCII = TRUE; - //get the options UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); + UBool srcIsASCII = TRUE; + UBool srcIsLDH = TRUE; + int32_t failPos =0; + if(U_FAILURE(*status)){ goto CLEANUP; } @@ -453,6 +455,13 @@ idnaref_toUnicode(const UChar* src, int32_t srcLength, if(src[srcLength]> 0x7f){ srcIsASCII = FALSE; } + // here we do not assemble surrogates + // since we know that LDH code points + // are in the ASCII range only + if(prep->isLDHChar(src[srcLength])==FALSE){ + srcIsLDH = FALSE; + failPos = srcLength; + } srcLength++; } }else{ @@ -460,6 +469,13 @@ idnaref_toUnicode(const UChar* src, int32_t srcLength, if(src[j]> 0x7f){ srcIsASCII = FALSE; } + // here we do not assemble surrogates + // since we know that LDH code points + // are in the ASCII range only + if(prep->isLDHChar(src[j])==FALSE){ + srcIsLDH = FALSE; + failPos = j; + } } } @@ -560,6 +576,27 @@ idnaref_toUnicode(const UChar* src, int32_t srcLength, uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); } }else{ + // verify that STD3 ASCII rules are satisfied + if(useSTD3ASCIIRules == TRUE){ + if( srcIsLDH == FALSE /* source contains some non-LDH characters */ + || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ + *status = U_IDNA_STD3_ASCII_RULES_ERROR; + + /* populate the parseError struct */ + if(srcIsLDH==FALSE){ + // failPos is always set the index of failure + uprv_syntaxError(src,failPos, srcLength,parseError); + }else if(src[0] == HYPHEN){ + // fail position is 0 + uprv_syntaxError(src,0,srcLength,parseError); + }else{ + // the last index in the source is always length-1 + uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); + } + + goto CLEANUP; + } + } //copy the source to destination if(srcLength <= destCapacity){ uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); diff --git a/icu4c/source/test/intltest/testidna.cpp b/icu4c/source/test/intltest/testidna.cpp index 103e409017c..27e48dab3f0 100644 --- a/icu4c/source/test/intltest/testidna.cpp +++ b/icu4c/source/test/intltest/testidna.cpp @@ -340,7 +340,7 @@ static struct ErrorCases{ U_IDNA_CHECK_BIDI_ERROR, FALSE, TRUE, TRUE }, - /* + { { 0x0077, 0x0077, 0x0077, 0x002e, // www. @@ -348,11 +348,43 @@ static struct ErrorCases{ 0x002e, 0x0063, 0x006f, 0x006d, // com. 0x0000 }, - "www.xn--989AoMsVi.com", - U_ZERO_ERROR, + "www.XN--ghbgi278xia.com", + U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR, FALSE, TRUE, TRUE }, - */ + { + { + 0x0077, 0x0077, 0x0077, 0x002e, // www. + 0x002D, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, // HYPHEN at the start of label + 0x002e, 0x0063, 0x006f, 0x006d, // com. + 0x0000 + }, + "www.-abcde.com", + U_IDNA_STD3_ASCII_RULES_ERROR, + TRUE, TRUE, FALSE + }, + { + { + 0x0077, 0x0077, 0x0077, 0x002e, // www. + 0x0041, 0x0042, 0x0043, 0x0044, 0x0045,0x002D, // HYPHEN at the end of the label + 0x002e, 0x0063, 0x006f, 0x006d, // com. + 0x0000 + }, + "www.abcde-.com", + U_IDNA_STD3_ASCII_RULES_ERROR, + TRUE, TRUE, FALSE + }, + { + { + 0x0077, 0x0077, 0x0077, 0x002e, // www. + 0x0041, 0x0042, 0x0043, 0x0044, 0x0045,0x0040, // Containing non LDH code point + 0x002e, 0x0063, 0x006f, 0x006d, // com. + 0x0000 + }, + "www.abcde@.com", + U_IDNA_STD3_ASCII_RULES_ERROR, + TRUE, TRUE, FALSE + }, }; @@ -558,87 +590,135 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test int32_t expectedLen = u_strlen(expected); int32_t options = (useSTD3ASCIIRules == TRUE) ? UIDNA_USE_STD3_RULES : UIDNA_DEFAULT; UParseError parseError; + int32_t tSrcLen = u_strlen(src); + UChar* tSrc = (UChar*) uprv_malloc( U_SIZEOF_UCHAR * tSrcLen ); + + uprv_memcpy(tSrc,src,tSrcLen * U_SIZEOF_UCHAR); // test null-terminated source and return value of number of UChars required - destLen = func(src,-1,dest,0,options, &parseError , &status); - if(status == U_BUFFER_OVERFLOW_ERROR){ - status = U_ZERO_ERROR; // reset error code - if(destLen+1 < MAX_DEST_SIZE){ - dest = destStack; - destLen = func(src,-1,dest,destLen+1,options, &parseError, &status); - // TODO : compare output with expected - if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){ - errln("Did not get the expected result for "+UnicodeString(testName) +" null terminated source. Expected : " - + prettify(UnicodeString(expected,expectedLen)) - + " Got: " + prettify(UnicodeString(dest,destLen)) - ); - } - }else{ - errln( "%s null terminated source failed. Requires destCapacity > 300\n",testName); - } - } - - if(status != expectedStatus){ - errln( "Did not get the expected error for %s null terminated source failed. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status)); - } - if(testUnassigned ){ - status = U_ZERO_ERROR; - destLen = func(src,-1,dest,0,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status); + if( expectedStatus != U_IDNA_STD3_ASCII_RULES_ERROR ){ + destLen = func(src,-1,dest,0,options, &parseError , &status); if(status == U_BUFFER_OVERFLOW_ERROR){ status = U_ZERO_ERROR; // reset error code if(destLen+1 < MAX_DEST_SIZE){ dest = destStack; - destLen = func(src,-1,dest,destLen+1,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status); + destLen = func(src,-1,dest,destLen+1,options, &parseError, &status); + // TODO : compare output with expected + if(U_SUCCESS(status) && expectedStatus != U_IDNA_STD3_ASCII_RULES_ERROR&& (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){ + errln("Did not get the expected result for "+UnicodeString(testName) +" null terminated source. Expected : " + + prettify(UnicodeString(expected,expectedLen)) + + " Got: " + prettify(UnicodeString(dest,destLen)) + ); + } + }else{ + errln( "%s null terminated source failed. Requires destCapacity > 300\n",testName); + } + } + + if(status != expectedStatus){ + errln( "Did not get the expected error for %s null terminated source failed. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status)); + } + if(testUnassigned ){ + status = U_ZERO_ERROR; + destLen = func(src,-1,dest,0,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status); + if(status == U_BUFFER_OVERFLOW_ERROR){ + status = U_ZERO_ERROR; // reset error code + if(destLen+1 < MAX_DEST_SIZE){ + dest = destStack; + destLen = func(src,-1,dest,destLen+1,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status); + // TODO : compare output with expected + if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){ + //errln("Did not get the expected result for %s null terminated source with both options set.\n",testName); + errln("Did not get the expected result for "+UnicodeString(testName) +" null terminated source with both options set. Expected: "+ prettify(UnicodeString(expected,expectedLen))); + + } + }else{ + errln( "%s null terminated source failed. Requires destCapacity > 300\n",testName); + } + } + //testing query string + if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR){ + errln( "Did not get the expected error for %s null terminated source with options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status)); + } + } + + status = U_ZERO_ERROR; + + // test source with lengthand return value of number of UChars required + destLen = func(tSrc, tSrcLen, dest,0,options, &parseError, &status); + if(status == U_BUFFER_OVERFLOW_ERROR){ + status = U_ZERO_ERROR; // reset error code + if(destLen+1 < MAX_DEST_SIZE){ + dest = destStack; + destLen = func(src,u_strlen(src),dest,destLen+1,options, &parseError, &status); + // TODO : compare output with expected + if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){ + errln("Did not get the expected result for %s with source length.\n",testName); + } + }else{ + errln( "%s with source length failed. Requires destCapacity > 300\n",testName); + } + } + + if(status != expectedStatus){ + errln( "Did not get the expected error for %s with source length. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status)); + } + if(testUnassigned){ + status = U_ZERO_ERROR; + + destLen = func(tSrc,tSrcLen,dest,0,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status); + + if(status == U_BUFFER_OVERFLOW_ERROR){ + status = U_ZERO_ERROR; // reset error code + if(destLen+1 < MAX_DEST_SIZE){ + dest = destStack; + destLen = func(src,u_strlen(src),dest,destLen+1,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status); + // TODO : compare output with expected + if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){ + errln("Did not get the expected result for %s with source length and both options set.\n",testName); + } + }else{ + errln( "%s with source length failed. Requires destCapacity > 300\n",testName); + } + } + //testing query string + if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR){ + errln( "Did not get the expected error for %s with source length and options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status)); + } + } + }else{ + + status = U_ZERO_ERROR; + destLen = func(src,-1,dest,0,options | UIDNA_USE_STD3_RULES, &parseError, &status); + if(status == U_BUFFER_OVERFLOW_ERROR){ + status = U_ZERO_ERROR; // reset error code + if(destLen+1 < MAX_DEST_SIZE){ + dest = destStack; + destLen = func(src,-1,dest,destLen+1,options | UIDNA_USE_STD3_RULES, &parseError, &status); // TODO : compare output with expected if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){ //errln("Did not get the expected result for %s null terminated source with both options set.\n",testName); errln("Did not get the expected result for "+UnicodeString(testName) +" null terminated source with both options set. Expected: "+ prettify(UnicodeString(expected,expectedLen))); - + } }else{ errln( "%s null terminated source failed. Requires destCapacity > 300\n",testName); } } //testing query string - if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR){ + if(status != expectedStatus){ errln( "Did not get the expected error for %s null terminated source with options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status)); - } - } + } - status = U_ZERO_ERROR; - int32_t tSrcLen = u_strlen(src); - UChar* tSrc = (UChar*) uprv_malloc( U_SIZEOF_UCHAR * tSrcLen ); - uprv_memcpy(tSrc,src,tSrcLen * U_SIZEOF_UCHAR); - - // test source with lengthand return value of number of UChars required - destLen = func(tSrc, tSrcLen, dest,0,options, &parseError, &status); - if(status == U_BUFFER_OVERFLOW_ERROR){ - status = U_ZERO_ERROR; // reset error code - if(destLen+1 < MAX_DEST_SIZE){ - dest = destStack; - destLen = func(src,u_strlen(src),dest,destLen+1,options, &parseError, &status); - // TODO : compare output with expected - if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){ - errln("Did not get the expected result for %s with source length.\n",testName); - } - }else{ - errln( "%s with source length failed. Requires destCapacity > 300\n",testName); - } - } - - if(status != expectedStatus){ - errln( "Did not get the expected error for %s with source length. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status)); - } - if(testUnassigned){ status = U_ZERO_ERROR; - destLen = func(tSrc,tSrcLen,dest,0,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status); + destLen = func(tSrc,tSrcLen,dest,0,options | UIDNA_USE_STD3_RULES, &parseError, &status); if(status == U_BUFFER_OVERFLOW_ERROR){ status = U_ZERO_ERROR; // reset error code if(destLen+1 < MAX_DEST_SIZE){ dest = destStack; - destLen = func(src,u_strlen(src),dest,destLen+1,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status); + destLen = func(src,u_strlen(src),dest,destLen+1,options | UIDNA_USE_STD3_RULES, &parseError, &status); // TODO : compare output with expected if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){ errln("Did not get the expected result for %s with source length and both options set.\n",testName);