mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 05:55:35 +00:00
ICU-2194 IDNA API
X-SVN-Rev: 11194
This commit is contained in:
parent
78f36c9a5a
commit
7da935c904
13 changed files with 2877 additions and 2 deletions
|
@ -66,7 +66,8 @@ brkiter.o brkdict.o ubrk.o dbbi.o dbbi_tbl.o \
|
|||
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
|
||||
utrie.o uset.o cmemory.o caniter.o \
|
||||
unifilt.o unifunct.o uniset.o usetiter.o util.o uenum.o \
|
||||
icuserv.o iculserv.o icunotif.o ustrenum.o
|
||||
icuserv.o iculserv.o icunotif.o ustrenum.o \
|
||||
uidna.o strprep.o nameprep.o punycode.o
|
||||
|
||||
STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O))
|
||||
|
||||
|
|
|
@ -3244,6 +3244,65 @@ InputPath=.\unicode\utf_old.h
|
|||
|
||||
!ENDIF
|
||||
|
||||
# End Source File
|
||||
# End Group
|
||||
# Begin Group "idna"
|
||||
|
||||
# PROP Default_Filter "*.c,*.h"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\nameprep.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\nameprep.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\punycode.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\punycode.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\sprpimpl.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\strprep.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\strprep.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\uidna.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\unicode\uidna.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\unicode\uidna.h
|
||||
|
||||
"..\..\include\unicode\uidna.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(InputPath) ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win64 Release"
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win64 Debug"
|
||||
|
||||
!ENDIF
|
||||
|
||||
# End Source File
|
||||
# End Group
|
||||
# End Target
|
||||
|
|
41
icu4c/source/common/nameprep.cpp
Normal file
41
icu4c/source/common/nameprep.cpp
Normal file
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: strprep.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003feb1
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#include "nameprep.h"
|
||||
// *****************************************************************************
|
||||
// class NamePrep
|
||||
// *****************************************************************************
|
||||
static const UChar ASCII_SPACE = 0x0020;
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
const char NamePrep::fgClassID=0;
|
||||
|
||||
// default constructor
|
||||
NamePrep::NamePrep(UErrorCode& status){
|
||||
bidiCheck = TRUE;
|
||||
doNFKC = TRUE;
|
||||
}
|
||||
|
||||
UBool NamePrep::isNotProhibited(UChar32 ch){
|
||||
return (UBool)(ch == ASCII_SPACE);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
|
||||
|
97
icu4c/source/common/nameprep.h
Normal file
97
icu4c/source/common/nameprep.h
Normal file
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: nameprep.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003feb1
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#ifndef NAMEPREP_H
|
||||
#define NAMEPREP_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "strprep.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
/*
|
||||
A profile of stringprep MUST include all of the following:
|
||||
|
||||
- The intended applicability of the profile
|
||||
|
||||
- The character repertoire that is the input and output to stringprep
|
||||
(which is Unicode 3.2 for this version of stringprep)
|
||||
|
||||
- The mapping tables from this document used (as described in section
|
||||
3)
|
||||
|
||||
- Any additional mapping tables specific to the profile
|
||||
|
||||
- The Unicode normalization used, if any (as described in section 4)
|
||||
|
||||
- The tables from this document of characters that are prohibited as
|
||||
output (as described in section 5)
|
||||
|
||||
- The bidirectional string testing used, if any (as described in
|
||||
section 6)
|
||||
|
||||
- Any additional characters that are prohibited as output specific to
|
||||
the profile
|
||||
*/
|
||||
|
||||
|
||||
class NamePrep: public StringPrep {
|
||||
public :
|
||||
NamePrep(UErrorCode& status);
|
||||
|
||||
virtual inline ~NamePrep(){};
|
||||
|
||||
virtual inline UBool isNotProhibited(UChar32 ch);
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
||||
*
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for this class.
|
||||
*
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
|
||||
|
||||
private:
|
||||
/**
|
||||
* The address of this static class variable serves as this class's ID
|
||||
* for ICU "poor man's RTTI".
|
||||
*/
|
||||
static const char fgClassID;
|
||||
};
|
||||
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
||||
|
||||
|
563
icu4c/source/common/punycode.c
Normal file
563
icu4c/source/common/punycode.c
Normal file
|
@ -0,0 +1,563 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: punycode.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2002jan31
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
|
||||
/* This ICU code derived from: */
|
||||
/*
|
||||
punycode.c 0.4.0 (2001-Nov-17-Sat)
|
||||
http://www.cs.berkeley.edu/~amc/idn/
|
||||
Adam M. Costello
|
||||
http://www.nicemice.net/amc/
|
||||
*/
|
||||
/*
|
||||
* ICU modifications:
|
||||
* - ICU data types and coding conventions
|
||||
* - ICU string buffer handling with implicit source lengths
|
||||
* and destination preflighting
|
||||
* - UTF-16 handling
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "cstring.h"
|
||||
#include "cmemory.h"
|
||||
#include "punycode.h"
|
||||
#include "unicode/ustring.h"
|
||||
|
||||
|
||||
/* Punycode ----------------------------------------------------------------- */
|
||||
|
||||
/* Punycode parameters for Bootstring */
|
||||
#define BASE 36
|
||||
#define TMIN 1
|
||||
#define TMAX 26
|
||||
#define SKEW 38
|
||||
#define DAMP 700
|
||||
#define INITIAL_BIAS 72
|
||||
#define INITIAL_N 0x80
|
||||
|
||||
/* "Basic" Unicode/ASCII code points */
|
||||
#define _HYPHEN 0X2d
|
||||
#define DELIMITER _HYPHEN
|
||||
|
||||
#define _ZERO 0X30
|
||||
#define _NINE 0x39
|
||||
|
||||
#define _SMALL_A 0X61
|
||||
#define _SMALL_Z 0X7a
|
||||
|
||||
#define _CAPITAL_A 0X41
|
||||
#define _CAPITAL_Z 0X5a
|
||||
|
||||
#define IS_BASIC(c) ((c)<0x80)
|
||||
#define IS_BASIC_UPPERCASE(c) (_CAPITAL_A<=(c) && (c)<=_CAPITAL_Z)
|
||||
|
||||
/**
|
||||
* digitToBasic() returns the basic code point whose value
|
||||
* (when used for representing integers) is d, which must be in the
|
||||
* range 0 to BASE-1. The lowercase form is used unless the uppercase flag is
|
||||
* nonzero, in which case the uppercase form is used.
|
||||
*/
|
||||
U_INLINE char
|
||||
digitToBasic(int32_t digit, UBool uppercase) {
|
||||
/* 0..25 map to ASCII a..z or A..Z */
|
||||
/* 26..35 map to ASCII 0..9 */
|
||||
if(digit<26) {
|
||||
if(uppercase) {
|
||||
return (char)(_CAPITAL_A+digit);
|
||||
} else {
|
||||
return (char)(_SMALL_A+digit);
|
||||
}
|
||||
} else {
|
||||
return (char)((_ZERO-26)+digit);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* basicToDigit[] contains the numeric value of a basic code
|
||||
* point (for use in representing integers) in the range 0 to
|
||||
* BASE-1, or -1 if b is does not represent a value.
|
||||
*/
|
||||
static int8_t
|
||||
basicToDigit[256]={
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
|
||||
};
|
||||
|
||||
U_INLINE char
|
||||
asciiCaseMap(char b, UBool uppercase) {
|
||||
if(uppercase) {
|
||||
if(_SMALL_A<=b && b<=_SMALL_Z) {
|
||||
b-=(_SMALL_A-_CAPITAL_A);
|
||||
}
|
||||
} else {
|
||||
if(_CAPITAL_A<=b && b<=_CAPITAL_Z) {
|
||||
b+=(_SMALL_A-_CAPITAL_A);
|
||||
}
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
/* Punycode-specific Bootstring code ---------------------------------------- */
|
||||
|
||||
/*
|
||||
* The following code omits the {parts} of the pseudo-algorithm in the spec
|
||||
* that are not used with the Punycode parameter set.
|
||||
*/
|
||||
|
||||
/* Bias adaptation function. */
|
||||
static int32_t
|
||||
adaptBias(int32_t delta, int32_t length, UBool firstTime) {
|
||||
int32_t count;
|
||||
|
||||
if(firstTime) {
|
||||
delta/=DAMP;
|
||||
} else {
|
||||
delta/=2;
|
||||
}
|
||||
|
||||
delta+=delta/length;
|
||||
for(count=0; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) {
|
||||
delta/=(BASE-TMIN);
|
||||
}
|
||||
|
||||
return count+(((BASE-TMIN+1)*delta)/(delta+SKEW));
|
||||
}
|
||||
|
||||
#define MAX_CP_COUNT 200
|
||||
|
||||
U_CFUNC int32_t
|
||||
u_strToPunycode(const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UBool *caseFlags,
|
||||
UErrorCode *pErrorCode) {
|
||||
|
||||
int32_t cpBuffer[MAX_CP_COUNT];
|
||||
int32_t n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
|
||||
UChar c, c2;
|
||||
|
||||
/* argument checking */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle the basic code points and
|
||||
* convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit):
|
||||
*/
|
||||
srcCPCount=destLength=0;
|
||||
if(srcLength==-1) {
|
||||
/* NUL-terminated input */
|
||||
for(j=0; /* no condition */; ++j) {
|
||||
if((c=src[j])==0) {
|
||||
break;
|
||||
}
|
||||
if(srcCPCount==MAX_CP_COUNT) {
|
||||
/* too many input code points */
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
if(IS_BASIC(c)) {
|
||||
cpBuffer[srcCPCount++]=0;
|
||||
if(destLength<destCapacity) {
|
||||
dest[destLength]=
|
||||
caseFlags!=NULL ?
|
||||
asciiCaseMap((char)c, caseFlags[j]) :
|
||||
(char)c;
|
||||
}
|
||||
++destLength;
|
||||
} else {
|
||||
n=(caseFlags!=NULL && caseFlags[j])<<31L;
|
||||
if(UTF_IS_SINGLE(c)) {
|
||||
n|=c;
|
||||
} else if(UTF_IS_LEAD(c) && UTF_IS_TRAIL(c2=src[j+1])) {
|
||||
++j;
|
||||
n|=(int32_t)UTF16_GET_PAIR_VALUE(c, c2);
|
||||
} else {
|
||||
/* error: unmatched surrogate */
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
cpBuffer[srcCPCount++]=n;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* length-specified input */
|
||||
for(j=0; j<srcLength; ++j) {
|
||||
if(srcCPCount==MAX_CP_COUNT) {
|
||||
/* too many input code points */
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
c=src[j];
|
||||
if(IS_BASIC(c)) {
|
||||
if(destLength<destCapacity) {
|
||||
cpBuffer[srcCPCount++]=0;
|
||||
dest[destLength]=
|
||||
caseFlags!=NULL ?
|
||||
asciiCaseMap((char)c, caseFlags[j]) :
|
||||
(char)c;
|
||||
}
|
||||
++destLength;
|
||||
} else {
|
||||
n=(caseFlags!=NULL && caseFlags[j])<<31L;
|
||||
if(UTF_IS_SINGLE(c)) {
|
||||
n|=c;
|
||||
} else if(UTF_IS_LEAD(c) && (j+1)<srcLength && UTF_IS_TRAIL(c2=src[j+1])) {
|
||||
++j;
|
||||
n|=(int32_t)UTF16_GET_PAIR_VALUE(c, c2);
|
||||
} else {
|
||||
/* error: unmatched surrogate */
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
cpBuffer[srcCPCount++]=n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Finish the basic string - if it is not empty - with a delimiter. */
|
||||
basicLength=destLength;
|
||||
if(basicLength>0) {
|
||||
if(destLength<destCapacity) {
|
||||
dest[destLength]=DELIMITER;
|
||||
}
|
||||
++destLength;
|
||||
}
|
||||
|
||||
/*
|
||||
* handledCPCount is the number of code points that have been handled
|
||||
* basicLength is the number of basic code points
|
||||
* destLength is the number of chars that have been output
|
||||
*/
|
||||
|
||||
/* Initialize the state: */
|
||||
n=INITIAL_N;
|
||||
delta=0;
|
||||
bias=INITIAL_BIAS;
|
||||
|
||||
/* Main encoding loop: */
|
||||
for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) {
|
||||
/*
|
||||
* All non-basic code points < n have been handled already.
|
||||
* Find the next larger one:
|
||||
*/
|
||||
for(m=0x7fffffff, j=0; j<srcCPCount; ++j) {
|
||||
q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
|
||||
if(n<=q && q<m) {
|
||||
m=q;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Increase delta enough to advance the decoder's
|
||||
* <n,i> state to <m,0>, but guard against overflow:
|
||||
*/
|
||||
if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) {
|
||||
*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
|
||||
return 0;
|
||||
}
|
||||
delta+=(m-n)*(handledCPCount+1);
|
||||
n=m;
|
||||
|
||||
/* Encode a sequence of same code points n */
|
||||
for(j=0; j<srcCPCount; ++j) {
|
||||
q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
|
||||
if(q<n) {
|
||||
++delta;
|
||||
} else if(q==n) {
|
||||
/* Represent delta as a generalized variable-length integer: */
|
||||
for(q=delta, k=BASE; /* no condition */; k+=BASE) {
|
||||
|
||||
/** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
|
||||
|
||||
t=k-bias;
|
||||
if(t<TMIN) {
|
||||
t=TMIN;
|
||||
} else if(t>TMAX) {
|
||||
t=TMAX;
|
||||
}
|
||||
*/
|
||||
|
||||
t=k-bias;
|
||||
if(t<TMIN) {
|
||||
t=TMIN;
|
||||
} else if(k>=(bias+TMAX)) {
|
||||
t=TMAX;
|
||||
}
|
||||
|
||||
if(q<t) {
|
||||
break;
|
||||
}
|
||||
|
||||
if(destLength<destCapacity) {
|
||||
dest[destLength++]=digitToBasic(t+(q-t)%(BASE-t), 0);
|
||||
}
|
||||
q=(q-t)/(BASE-t);
|
||||
}
|
||||
|
||||
if(destLength<destCapacity) {
|
||||
dest[destLength++]=digitToBasic(q, (UBool)(cpBuffer[j]<0));
|
||||
}
|
||||
bias=adaptBias(delta, handledCPCount+1, (UBool)(handledCPCount==basicLength));
|
||||
delta=0;
|
||||
++handledCPCount;
|
||||
}
|
||||
}
|
||||
|
||||
++delta;
|
||||
++n;
|
||||
}
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
|
||||
}
|
||||
|
||||
U_CFUNC int32_t
|
||||
u_strFromPunycode(const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UBool *caseFlags,
|
||||
UErrorCode *pErrorCode) {
|
||||
int32_t n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
|
||||
destCPCount, firstSupplementaryIndex, cpLength;
|
||||
UChar b;
|
||||
|
||||
/* argument checking */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(srcLength==-1) {
|
||||
srcLength=u_strlen(src);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle the basic code points:
|
||||
* Let basicLength be the number of input code points
|
||||
* before the last delimiter, or 0 if there is none,
|
||||
* then copy the first basicLength code points to the output.
|
||||
*
|
||||
* The two following loops iterate backward.
|
||||
*/
|
||||
for(j=srcLength; j>0;) {
|
||||
if(src[--j]==DELIMITER) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
destLength=basicLength=destCPCount=j;
|
||||
|
||||
while(j>0) {
|
||||
b=src[--j];
|
||||
if(!IS_BASIC(b)) {
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(j<destCapacity) {
|
||||
dest[j]=(UChar)b;
|
||||
|
||||
if(caseFlags!=NULL) {
|
||||
caseFlags[j]=IS_BASIC_UPPERCASE(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize the state: */
|
||||
n=INITIAL_N;
|
||||
i=0;
|
||||
bias=INITIAL_BIAS;
|
||||
firstSupplementaryIndex=1000000000;
|
||||
|
||||
/*
|
||||
* Main decoding loop:
|
||||
* Start just after the last delimiter if any
|
||||
* basic code points were copied; start at the beginning otherwise.
|
||||
*/
|
||||
for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) {
|
||||
/*
|
||||
* in is the index of the next character to be consumed, and
|
||||
* destCPCount is the number of code points in the output array.
|
||||
*
|
||||
* Decode a generalized variable-length integer into delta,
|
||||
* which gets added to i. The overflow checking is easier
|
||||
* if we increase i as we go, then subtract off its starting
|
||||
* value at the end to obtain delta.
|
||||
*/
|
||||
for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) {
|
||||
if(in>=srcLength) {
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
|
||||
digit=basicToDigit[(uint8_t)src[in++]];
|
||||
if(digit<0) {
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
if(digit>(0x7fffffff-i)/w) {
|
||||
/* integer overflow */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
|
||||
i+=digit*w;
|
||||
/** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
|
||||
t=k-bias;
|
||||
if(t<TMIN) {
|
||||
t=TMIN;
|
||||
} else if(t>TMAX) {
|
||||
t=TMAX;
|
||||
}
|
||||
*/
|
||||
t=k-bias;
|
||||
if(t<TMIN) {
|
||||
t=TMIN;
|
||||
} else if(k>=(bias+TMAX)) {
|
||||
t=TMAX;
|
||||
}
|
||||
if(digit<t) {
|
||||
break;
|
||||
}
|
||||
|
||||
if(w>0x7fffffff/(BASE-t)) {
|
||||
/* integer overflow */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
w*=BASE-t;
|
||||
}
|
||||
|
||||
/*
|
||||
* Modification from sample code:
|
||||
* Increments destCPCount here,
|
||||
* where needed instead of in for() loop tail.
|
||||
*/
|
||||
++destCPCount;
|
||||
bias=adaptBias(i-oldi, destCPCount, (UBool)(oldi==0));
|
||||
|
||||
/*
|
||||
* i was supposed to wrap around from (incremented) destCPCount to 0,
|
||||
* incrementing n each time, so we'll fix that now:
|
||||
*/
|
||||
if(i/destCPCount>(0x7fffffff-n)) {
|
||||
/* integer overflow */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
|
||||
n+=i/destCPCount;
|
||||
i%=destCPCount;
|
||||
/* not needed for Punycode: */
|
||||
/* if (decode_digit(n) <= BASE) return punycode_invalid_input; */
|
||||
|
||||
if(n>0x10ffff || UTF_IS_SURROGATE(n)) {
|
||||
/* Unicode code point overflow */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Insert n at position i of the output: */
|
||||
cpLength=UTF_CHAR_LENGTH(n);
|
||||
if((destLength+cpLength)<destCapacity) {
|
||||
int32_t codeUnitIndex;
|
||||
|
||||
/*
|
||||
* Handle indexes when supplementary code points are present.
|
||||
*
|
||||
* In almost all cases, there will be only BMP code points before i
|
||||
* and even in the entire string.
|
||||
* This is handled with the same efficiency as with UTF-32.
|
||||
*
|
||||
* Only the rare cases with supplementary code points are handled
|
||||
* more slowly - but not too bad since this is an insertion anyway.
|
||||
*/
|
||||
if(i<=firstSupplementaryIndex) {
|
||||
codeUnitIndex=i;
|
||||
if(cpLength>1) {
|
||||
firstSupplementaryIndex=codeUnitIndex;
|
||||
} else {
|
||||
++firstSupplementaryIndex;
|
||||
}
|
||||
} else {
|
||||
codeUnitIndex=firstSupplementaryIndex;
|
||||
UTF_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex);
|
||||
}
|
||||
|
||||
/* use the UChar index codeUnitIndex instead of the code point index i */
|
||||
if(codeUnitIndex<destLength) {
|
||||
uprv_memmove(dest+codeUnitIndex+cpLength,
|
||||
dest+codeUnitIndex,
|
||||
(destLength-codeUnitIndex)*U_SIZEOF_UCHAR);
|
||||
if(caseFlags!=NULL) {
|
||||
uprv_memmove(caseFlags+codeUnitIndex+cpLength,
|
||||
caseFlags+codeUnitIndex,
|
||||
destLength-codeUnitIndex);
|
||||
}
|
||||
}
|
||||
if(cpLength==1) {
|
||||
/* BMP, insert one code unit */
|
||||
dest[codeUnitIndex]=(UChar)n;
|
||||
} else {
|
||||
/* supplementary character, insert two code units */
|
||||
dest[codeUnitIndex]=UTF16_LEAD(n);
|
||||
dest[codeUnitIndex+1]=UTF16_TRAIL(n);
|
||||
}
|
||||
if(caseFlags!=NULL) {
|
||||
/* Case of last character determines uppercase flag: */
|
||||
caseFlags[codeUnitIndex]=IS_BASIC_UPPERCASE(src[in-1]);
|
||||
if(cpLength==2) {
|
||||
caseFlags[codeUnitIndex+1]=FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
destLength+=cpLength;
|
||||
++i;
|
||||
}
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
|
||||
}
|
||||
|
||||
/* ### check notes on overflow handling - only necessary if not IDNA? are these Punycode functions to be public? */
|
115
icu4c/source/common/punycode.h
Normal file
115
icu4c/source/common/punycode.h
Normal file
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: punycode.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2002jan31
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
/* This ICU code derived from: */
|
||||
/*
|
||||
punycode.c 0.4.0 (2001-Nov-17-Sat)
|
||||
http://www.cs.berkeley.edu/~amc/idn/
|
||||
Adam M. Costello
|
||||
http://www.nicemice.net/amc/
|
||||
*/
|
||||
|
||||
#ifndef __PUNYCODE_H__
|
||||
#define __PUNYCODE_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* u_strToPunycode() converts Unicode to Punycode.
|
||||
*
|
||||
* The input string must not contain single, unpaired surrogates.
|
||||
* The output will be represented as an array of ASCII code points.
|
||||
*
|
||||
* The output string is NUL-terminated according to normal ICU
|
||||
* string output rules.
|
||||
*
|
||||
* @param src Input Unicode string.
|
||||
* This function handles a limited amount of code points
|
||||
* (the limit is >=64).
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR is set if the limit is exceeded.
|
||||
* @param srcLength Number of UChars in src, or -1 if NUL-terminated.
|
||||
* @param dest Output Punycode array.
|
||||
* @param destCapacity Size of dest.
|
||||
* @param caseFlags Vector of boolean values, one per input UChar,
|
||||
* indicating that the corresponding character is to be
|
||||
* marked for the decoder optionally
|
||||
* uppercasing (TRUE) or lowercasing (FALSE)
|
||||
* the character.
|
||||
* ASCII characters are output directly in the case as marked.
|
||||
* Flags corresponding to trail surrogates are ignored.
|
||||
* If caseFlags==NULL then input characters are not
|
||||
* case-mapped.
|
||||
* @param pErrorCode ICU in/out error code parameter.
|
||||
* U_INVALID_CHAR_FOUND if src contains
|
||||
* unmatched single surrogates.
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
|
||||
* too many code points.
|
||||
* @return Number of ASCII characters in puny.
|
||||
*
|
||||
* @see u_strFromPunycode
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
u_strToPunycode(const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UBool *caseFlags,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* u_strFromPunycode() converts Punycode to Unicode.
|
||||
* The Unicode string will be at most as long (in UChars)
|
||||
* than the Punycode string (in chars).
|
||||
*
|
||||
* @param src Input Punycode string.
|
||||
* @param srcLength Length of puny, or -1 if NUL-terminated
|
||||
* @param dest Output Unicode string buffer.
|
||||
* @param destCapacity Size of dest in number of UChars,
|
||||
* and of caseFlags in numbers of UBools.
|
||||
* @param caseFlags Output array for case flags as
|
||||
* defined by the Punycode string.
|
||||
* The caller should uppercase (TRUE) or lowercase (FASLE)
|
||||
* the corresponding character in dest.
|
||||
* For supplementary characters, only the lead surrogate
|
||||
* is marked, and FALSE is stored for the trail surrogate.
|
||||
* This is redundant and not necessary for ASCII characters
|
||||
* because they are already in the case indicated.
|
||||
* Can be NULL if the case flags are not needed.
|
||||
* @param pErrorCode ICU in/out error code parameter.
|
||||
* U_INVALID_CHAR_FOUND if a non-ASCII character
|
||||
* precedes the last delimiter ('-'),
|
||||
* or if an invalid character (not a-zA-Z0-9) is found
|
||||
* after the last delimiter.
|
||||
* U_ILLEGAL_CHAR_FOUND if the delta sequence is ill-formed.
|
||||
* @return Number of UChars written to dest.
|
||||
*
|
||||
* @see u_strToPunycode
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
u_strFromPunycode(const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UBool *caseFlags,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
||||
|
|
@ -1761,6 +1761,18 @@ _uRegexErrorName[U_REGEX_ERROR_LIMIT - U_REGEX_ERROR_START] = {
|
|||
"U_REGEX_INVALID_FLAG"
|
||||
};
|
||||
|
||||
static const char * const
|
||||
_uIDNAErrorName[U_IDNA_ERROR_LIMIT - U_IDNA_ERROR_START] = {
|
||||
"U_IDNA_ERROR_START",
|
||||
"U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR",
|
||||
"U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR",
|
||||
"U_IDNA_CHECK_BIDI_ERROR",
|
||||
"U_IDNA_STD3_ASCII_RULES_ERROR",
|
||||
"U_IDNA_ACE_PREFIX_ERROR",
|
||||
"U_IDNA_VERIFICATION_ERROR",
|
||||
"U_IDNA_LABEL_TOO_LONG_ERROR"
|
||||
};
|
||||
|
||||
U_CAPI const char * U_EXPORT2
|
||||
u_errorName(UErrorCode code) {
|
||||
if(U_ZERO_ERROR <= code && code < U_STANDARD_ERROR_LIMIT) {
|
||||
|
@ -1775,6 +1787,8 @@ u_errorName(UErrorCode code) {
|
|||
return _uBrkErrorName[code - U_BRK_ERROR_START];
|
||||
} else if (U_REGEX_ERROR_START <= code && code < U_REGEX_ERROR_LIMIT) {
|
||||
return _uRegexErrorName[code - U_REGEX_ERROR_START];
|
||||
} else if( U_IDNA_ERROR_START <= code && code <= U_IDNA_ERROR_LIMIT) {
|
||||
return _uIDNAErrorName[code - U_IDNA_ERROR_START];
|
||||
} else {
|
||||
return "[BOGUS UErrorCode]";
|
||||
}
|
||||
|
|
65
icu4c/source/common/sprpimpl.h
Normal file
65
icu4c/source/common/sprpimpl.h
Normal file
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: strprep.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003feb1
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#ifndef SPRPIMPL_H
|
||||
#define SPRPIMPL_H
|
||||
|
||||
enum{
|
||||
UIDNA_NO_VALUE = 0x0000 ,
|
||||
UIDNA_UNASSIGNED = 0x0001 ,
|
||||
UIDNA_PROHIBITED = 0x0002 ,
|
||||
UIDNA_MAP_NFKC = 0x0003 ,
|
||||
UIDNA_LABEL_SEPARATOR = 0x0004 ,
|
||||
};
|
||||
enum{
|
||||
_IDNA_LENGTH_IN_MAPPING_TABLE = 0x0003 /*11*/
|
||||
};
|
||||
/* indexes[] value names */
|
||||
enum {
|
||||
_IDNA_INDEX_TRIE_SIZE, /* number of bytes in normalization trie */
|
||||
_IDNA_INDEX_MAPPING_DATA_SIZE, /* The array that contains the mapping */
|
||||
_IDNA_INDEX_TOP=3 /* changing this requires a new formatVersion */
|
||||
};
|
||||
|
||||
enum {
|
||||
_IDNA_MAPPING_DATA_SIZE = 1700,
|
||||
_IDNA_MAP_TO_NOTHING = 0xFFF
|
||||
};
|
||||
|
||||
U_CFUNC UBool U_EXPORT2
|
||||
ustrprep_cleanup();
|
||||
|
||||
/* error codes for prototyping
|
||||
#define U_IDNA_ERROR_START U_ERROR_LIMIT
|
||||
#define U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 1))
|
||||
#define U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 2))
|
||||
#define U_IDNA_CHECK_BIDI_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 3))
|
||||
#define U_IDNA_STD3_ASCII_RULES_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 4))
|
||||
#define U_IDNA_ACE_PREFIX_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 5))
|
||||
#define U_IDNA_VERIFICATION_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 6))
|
||||
#define U_IDNA_LABEL_TOO_LONG_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 8))
|
||||
*/
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
||||
|
530
icu4c/source/common/strprep.cpp
Normal file
530
icu4c/source/common/strprep.cpp
Normal file
|
@ -0,0 +1,530 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: strprep.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003feb1
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#include "strprep.h"
|
||||
#include "utrie.h"
|
||||
#include "umutex.h"
|
||||
#include "cmemory.h"
|
||||
#include "sprpimpl.h"
|
||||
#include "nameprep.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "unormimp.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/ustring.h"
|
||||
|
||||
static const uint16_t* mappingData = NULL;
|
||||
static int32_t indexes[_IDNA_INDEX_TOP]={ 0 };
|
||||
static UBool _isDataLoaded = FALSE;
|
||||
static UTrie idnTrie={ 0,0,0,0,0,0,0 };
|
||||
static UDataMemory* idnData=NULL;
|
||||
static UErrorCode dataErrorCode =U_ZERO_ERROR;
|
||||
/* file definitions */
|
||||
static const char* DATA_NAME = "uidna";
|
||||
static const char* DATA_TYPE = "icu";
|
||||
|
||||
U_CFUNC UBool U_EXPORT2
|
||||
ustrprep_cleanup() {
|
||||
if(idnData!=NULL) {
|
||||
udata_close(idnData);
|
||||
idnData=NULL;
|
||||
}
|
||||
dataErrorCode=U_ZERO_ERROR;
|
||||
_isDataLoaded=FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV
|
||||
isAcceptable(void * /* context */,
|
||||
const char * /* type */,
|
||||
const char * /* name */,
|
||||
const UDataInfo *pInfo) {
|
||||
if(
|
||||
pInfo->size>=20 &&
|
||||
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
||||
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
||||
pInfo->dataFormat[0]==0x49 && /* dataFormat="IDNA" 0x49, 0x44, 0x4e, 0x41 */
|
||||
pInfo->dataFormat[1]==0x44 &&
|
||||
pInfo->dataFormat[2]==0x4e &&
|
||||
pInfo->dataFormat[3]==0x41 &&
|
||||
pInfo->formatVersion[0]==2 &&
|
||||
pInfo->formatVersion[2]==UTRIE_SHIFT &&
|
||||
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
|
||||
) {
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
getFoldingOffset(uint32_t data) {
|
||||
if(data&0x8000) {
|
||||
return (int32_t)(data&0x7fff);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV
|
||||
loadData(UErrorCode &errorCode) {
|
||||
/* load Unicode IDNA data from file */
|
||||
|
||||
if(_isDataLoaded==FALSE) {
|
||||
UTrie _idnTrie={ 0,0,0,0,0,0,0 };
|
||||
UDataMemory *data;
|
||||
const int32_t *p=NULL;
|
||||
const uint8_t *pb;
|
||||
|
||||
if(&errorCode==NULL || U_FAILURE(errorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* open the data outside the mutex block */
|
||||
//TODO: change the path
|
||||
data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errorCode);
|
||||
dataErrorCode=errorCode;
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return _isDataLoaded=FALSE;
|
||||
}
|
||||
|
||||
p=(const int32_t *)udata_getMemory(data);
|
||||
pb=(const uint8_t *)(p+_IDNA_INDEX_TOP);
|
||||
utrie_unserialize(&_idnTrie, pb, p[_IDNA_INDEX_TRIE_SIZE], &errorCode);
|
||||
_idnTrie.getFoldingOffset=getFoldingOffset;
|
||||
|
||||
|
||||
if(U_FAILURE(errorCode)) {
|
||||
dataErrorCode=errorCode;
|
||||
udata_close(data);
|
||||
return _isDataLoaded=FALSE;
|
||||
}
|
||||
|
||||
/* in the mutex block, set the data for this process */
|
||||
umtx_lock(NULL);
|
||||
if(idnData==NULL) {
|
||||
idnData=data;
|
||||
data=NULL;
|
||||
uprv_memcpy(&indexes, p, sizeof(indexes));
|
||||
uprv_memcpy(&idnTrie, &_idnTrie, sizeof(UTrie));
|
||||
} else {
|
||||
p=(const int32_t *)udata_getMemory(idnData);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
/* initialize some variables */
|
||||
mappingData=(uint16_t *)((uint8_t *)(p+_IDNA_INDEX_TOP)+indexes[_IDNA_INDEX_TRIE_SIZE]);
|
||||
|
||||
_isDataLoaded = TRUE;
|
||||
|
||||
/* if a different thread set it first, then close the extra data */
|
||||
if(data!=NULL) {
|
||||
udata_close(data); /* NULL if it was set correctly */
|
||||
}
|
||||
}
|
||||
|
||||
return _isDataLoaded;
|
||||
}
|
||||
|
||||
|
||||
static inline
|
||||
void syntaxError(const UChar* rules,
|
||||
int32_t pos,
|
||||
int32_t rulesLen,
|
||||
UParseError* parseError) {
|
||||
|
||||
if(parseError == NULL){
|
||||
return;
|
||||
}
|
||||
if(pos == rulesLen && rulesLen >0){
|
||||
pos--;
|
||||
}
|
||||
parseError->offset = pos;
|
||||
parseError->line = 0 ; // we are not using line numbers
|
||||
|
||||
// for pre-context
|
||||
int32_t start = (pos <=U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
|
||||
int32_t stop = pos;
|
||||
|
||||
u_memcpy(parseError->preContext,rules+start,stop-start);
|
||||
//null terminate the buffer
|
||||
parseError->preContext[stop-start] = 0;
|
||||
|
||||
//for post-context
|
||||
start = pos+1;
|
||||
stop = ((pos+U_PARSE_CONTEXT_LEN)<= rulesLen )? (pos+(U_PARSE_CONTEXT_LEN-1)) :
|
||||
rulesLen;
|
||||
|
||||
u_memcpy(parseError->postContext,rules+start,stop-start);
|
||||
//null terminate the buffer
|
||||
parseError->postContext[stop-start]= 0;
|
||||
|
||||
}
|
||||
|
||||
// *****************************************************************************
|
||||
// class StringPrep
|
||||
// *****************************************************************************
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
const char StringPrep::fgClassID=0;
|
||||
|
||||
UBool StringPrep::isDataLoaded(UErrorCode& status){
|
||||
if(U_FAILURE(status)){
|
||||
return FALSE;
|
||||
}
|
||||
if(_isDataLoaded==FALSE && U_FAILURE(dataErrorCode)){
|
||||
status = dataErrorCode;
|
||||
return FALSE;
|
||||
}
|
||||
loadData(dataErrorCode);
|
||||
if(U_FAILURE(dataErrorCode)){
|
||||
status = dataErrorCode;
|
||||
return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
StringPrep* StringPrep::createDefaultInstance(UErrorCode& status){
|
||||
StringPrep* strprep = new StringPrep();
|
||||
if(!isDataLoaded(status)){
|
||||
delete strprep;
|
||||
return NULL;
|
||||
}
|
||||
return strprep;
|
||||
}
|
||||
|
||||
StringPrep* StringPrep::createNameprepInstance(UErrorCode& status){
|
||||
StringPrep* strprep = new NamePrep(status);
|
||||
if(!isDataLoaded(status)){
|
||||
delete strprep;
|
||||
return NULL;
|
||||
}
|
||||
return strprep;
|
||||
}
|
||||
|
||||
UBool StringPrep::isNotProhibited(UChar32 ch){
|
||||
return FALSE;
|
||||
}
|
||||
UBool StringPrep::isUnassigned(UChar32 ch){
|
||||
|
||||
uint32_t result;
|
||||
UTRIE_GET16(&idnTrie,ch,result);
|
||||
return (result == UIDNA_UNASSIGNED);
|
||||
|
||||
}
|
||||
|
||||
|
||||
static inline void getValues(uint32_t result, int8_t& flag,
|
||||
int8_t& length, int32_t& index){
|
||||
/* first 3 bits contain the flag */
|
||||
flag = (int8_t) (result & 0x07);
|
||||
/* next 2 bits contain the length */
|
||||
length = (int8_t) ((result>>3) & 0x03);
|
||||
/* next 10 bits contain the index */
|
||||
index = (result>> 5);
|
||||
}
|
||||
|
||||
|
||||
int32_t StringPrep::map(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
UBool allowUnassigned,
|
||||
UParseError* parseError,
|
||||
UErrorCode& status ){
|
||||
|
||||
uint32_t result;
|
||||
int8_t flag;
|
||||
int8_t length;
|
||||
int32_t index;
|
||||
int32_t destIndex=0;
|
||||
int32_t srcIndex=0;
|
||||
|
||||
// check error status
|
||||
if(U_FAILURE(status)){
|
||||
return 0;
|
||||
}
|
||||
|
||||
//check arguments
|
||||
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
|
||||
status=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
if(srcLength == -1){
|
||||
srcLength = u_strlen(src);
|
||||
}
|
||||
|
||||
for(;srcIndex<srcLength;){
|
||||
UChar32 ch;
|
||||
|
||||
U16_NEXT(src,srcIndex,srcLength,ch);
|
||||
|
||||
UTRIE_GET16(&idnTrie,ch,result);
|
||||
|
||||
getValues(result,flag,length,index);
|
||||
|
||||
// check if the source codepoint is unassigned
|
||||
if(flag == UIDNA_UNASSIGNED){
|
||||
if(allowUnassigned == TRUE){
|
||||
//copy the ch to destination
|
||||
if(ch <= 0xFFFF){
|
||||
if(destIndex < destCapacity ){
|
||||
dest[destIndex] = (UChar)ch;
|
||||
}
|
||||
destIndex++;
|
||||
}else{
|
||||
if(destIndex+1 < destCapacity ){
|
||||
dest[destIndex] = U16_LEAD(ch);
|
||||
dest[destIndex+1] = U16_TRAIL(ch);
|
||||
}
|
||||
destIndex +=2;
|
||||
}
|
||||
}else{
|
||||
syntaxError(src, srcIndex-1, srcLength,parseError);
|
||||
status = U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}else if((flag == UIDNA_MAP_NFKC && doNFKC == TRUE) ||
|
||||
(index == _IDNA_MAP_TO_NOTHING && doNFKC == FALSE)){
|
||||
|
||||
if(length == _IDNA_LENGTH_IN_MAPPING_TABLE){
|
||||
length = (int8_t) mappingData[index++];
|
||||
}
|
||||
|
||||
for(int8_t i =0; i< length; i++){
|
||||
if(destIndex < destCapacity ){
|
||||
dest[destIndex] = mappingData[index+i];
|
||||
}
|
||||
destIndex++; /* for pre-flighting */
|
||||
}
|
||||
}else{
|
||||
//copy the source into destination
|
||||
if(ch <= 0xFFFF){
|
||||
if(destIndex < destCapacity ){
|
||||
dest[destIndex] = (UChar)ch;
|
||||
}
|
||||
destIndex++;
|
||||
}else{
|
||||
if(destIndex+1 < destCapacity ){
|
||||
dest[destIndex] = U16_LEAD(ch);
|
||||
dest[destIndex+1] = U16_TRAIL(ch);
|
||||
}
|
||||
destIndex +=2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, destIndex, &status);
|
||||
}
|
||||
|
||||
|
||||
int32_t StringPrep::normalize( const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
UErrorCode& status ){
|
||||
|
||||
return unorm_normalize(src,srcLength,UNORM_NFKC,UNORM_UNICODE_3_2,dest,destCapacity,&status);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
1) Map -- For each character in the input, check if it has a mapping
|
||||
and, if so, replace it with its mapping.
|
||||
|
||||
2) Normalize -- Possibly normalize the result of step 1 using Unicode
|
||||
normalization.
|
||||
|
||||
3) Prohibit -- Check for any characters that are not allowed in the
|
||||
output. If any are found, return an error.
|
||||
|
||||
4) Check bidi -- Possibly check for right-to-left characters, and if
|
||||
any are found, make sure that the whole string satisfies the
|
||||
requirements for bidirectional strings. If the string does not
|
||||
satisfy the requirements for bidirectional strings, return an
|
||||
error.
|
||||
[Unicode3.2] defines several bidirectional categories; each character
|
||||
has one bidirectional category assigned to it. For the purposes of
|
||||
the requirements below, an "RandALCat character" is a character that
|
||||
has Unicode bidirectional categories "R" or "AL"; an "LCat character"
|
||||
is a character that has Unicode bidirectional category "L". Note
|
||||
|
||||
|
||||
that there are many characters which fall in neither of the above
|
||||
definitions; Latin digits (<U+0030> through <U+0039>) are examples of
|
||||
this because they have bidirectional category "EN".
|
||||
|
||||
In any profile that specifies bidirectional character handling, all
|
||||
three of the following requirements MUST be met:
|
||||
|
||||
1) The characters in section 5.8 MUST be prohibited.
|
||||
|
||||
2) If a string contains any RandALCat character, the string MUST NOT
|
||||
contain any LCat character.
|
||||
|
||||
3) If a string contains any RandALCat character, a RandALCat
|
||||
character MUST be the first character of the string, and a
|
||||
RandALCat character MUST be the last character of the string.
|
||||
*/
|
||||
|
||||
#define MAX_STACK_BUFFER_SIZE 300
|
||||
|
||||
int32_t StringPrep::process(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
UBool allowUnassigned,
|
||||
UParseError* parseError,
|
||||
UErrorCode& status ){
|
||||
// check error status
|
||||
if(U_FAILURE(status)){
|
||||
return 0;
|
||||
}
|
||||
|
||||
//check arguments
|
||||
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
|
||||
status=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
|
||||
UChar *b1 = b1Stack, *b2 = b2Stack;
|
||||
int32_t b1Len, b2Len,
|
||||
b1Capacity = MAX_STACK_BUFFER_SIZE ,
|
||||
b2Capacity = MAX_STACK_BUFFER_SIZE;
|
||||
uint32_t result;
|
||||
int32_t b2Index = 0;
|
||||
int8_t flag;
|
||||
int8_t length;
|
||||
int32_t index;
|
||||
UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
|
||||
UBool leftToRight=FALSE, rightToLeft=FALSE;
|
||||
int32_t rtlPos =-1, ltrPos =-1;
|
||||
|
||||
b1Len = map(src,srcLength, b1, b1Capacity,allowUnassigned, parseError, status);
|
||||
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// redo processing of string
|
||||
/* we do not have enough room so grow the buffer*/
|
||||
b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
|
||||
if(b1==NULL){
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR; // reset error
|
||||
|
||||
b1Len = map(src,srcLength, b1, b1Len,allowUnassigned, parseError, status);
|
||||
|
||||
}
|
||||
|
||||
b2Len = normalize(b1,b1Len, b2,b2Capacity,status);
|
||||
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// redo processing of string
|
||||
/* we do not have enough room so grow the buffer*/
|
||||
b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
|
||||
if(b2==NULL){
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR; // reset error
|
||||
|
||||
b2Len = normalize(b2,b2Len, b2,b2Len,status);
|
||||
|
||||
}
|
||||
|
||||
if(U_FAILURE(status)){
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
UChar32 ch;
|
||||
|
||||
for(; b2Index<b2Len;){
|
||||
|
||||
ch = 0;
|
||||
|
||||
U16_NEXT(b2, b2Index, b2Len, ch);
|
||||
|
||||
UTRIE_GET16(&idnTrie,ch,result);
|
||||
|
||||
getValues(result,flag,length,index);
|
||||
|
||||
if(flag == UIDNA_PROHIBITED
|
||||
&& isNotProhibited(ch) == FALSE){
|
||||
status = U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR;
|
||||
syntaxError(b1,b2Index-1,b2Len, parseError);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
direction = u_charDirection(ch);
|
||||
if(firstCharDir == U_CHAR_DIRECTION_COUNT){
|
||||
firstCharDir = direction;
|
||||
}
|
||||
if(direction == U_LEFT_TO_RIGHT){
|
||||
leftToRight = TRUE;
|
||||
ltrPos = b2Index-1;
|
||||
}
|
||||
if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
|
||||
rightToLeft = TRUE;
|
||||
rtlPos = b2Index-1;
|
||||
}
|
||||
}
|
||||
|
||||
// satisfy 2
|
||||
if( leftToRight == TRUE && rightToLeft == TRUE){
|
||||
status = U_IDNA_CHECK_BIDI_ERROR;
|
||||
syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
//satisfy 3
|
||||
if(rightToLeft == TRUE && firstCharDir != direction ){
|
||||
status = U_IDNA_CHECK_BIDI_ERROR;
|
||||
syntaxError(b2,b2Index-1,b2Len,parseError);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if(b2Len <= destCapacity){
|
||||
uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
|
||||
}
|
||||
|
||||
CLEANUP:
|
||||
if(b1!=b1Stack){
|
||||
uprv_free(b1);
|
||||
}
|
||||
if(b2!=b2Stack){
|
||||
uprv_free(b2);
|
||||
}
|
||||
return u_terminateUChars(dest, destCapacity, b2Len, &status);
|
||||
}
|
||||
|
||||
|
||||
UBool StringPrep::isLabelSeparator(UChar32 ch, UErrorCode& status){
|
||||
// check error status
|
||||
if(U_FAILURE(status)){
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if(isDataLoaded(status)){
|
||||
int32_t result;
|
||||
UTRIE_GET16(&idnTrie,ch, result);
|
||||
if( (result & 0x07) == UIDNA_LABEL_SEPARATOR){
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
360
icu4c/source/common/strprep.h
Normal file
360
icu4c/source/common/strprep.h
Normal file
|
@ -0,0 +1,360 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: strprep.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003feb1
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#ifndef STRPREP_H
|
||||
#define STRPREP_H
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/parseerr.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**\file
|
||||
*
|
||||
* This API implements RF 3454 StringPrep standard.
|
||||
*
|
||||
* The steps for preparing strings are:
|
||||
*
|
||||
* 1) Map -- For each character in the input, check if it has a mapping
|
||||
* and, if so, replace it with its mapping.
|
||||
* <ul>
|
||||
* <li>Delete certain codepoints from the input because their
|
||||
* presence or absence in the protocol identifies should not
|
||||
* make two strings different</li>
|
||||
* <li>Case Mapings
|
||||
* <br>If Normalization is turned off
|
||||
* <br> Get mappings from case map tables
|
||||
* <br>else
|
||||
* <br> Get mappings from case map tables for normalization
|
||||
* <br> Use u_getFC_NFKC_Closure for obtaining extra mappings
|
||||
* </li>
|
||||
* </ul>
|
||||
* 2) Normalize -- Possibly normalize the result of step 1 using Unicode
|
||||
* normalization NFKC.
|
||||
*
|
||||
* 3) Prohibit -- Check for any characters that are not allowed in the
|
||||
* output. If any are found, return an error.
|
||||
*
|
||||
* 4) Check bidi -- Possibly check for right-to-left characters, and if
|
||||
* any are found, make sure that the whole string satisfies the
|
||||
* requirements for bidirectional strings. If the string does not
|
||||
* satisfy the requirements for bidirectional strings, return an
|
||||
* error.
|
||||
*
|
||||
* Some StringPrep profiles:
|
||||
* IDN: "Nameprep" http://www.ietf.org/internet-drafts/draft-ietf-idn-nameprep-11.txt
|
||||
* XMPP Node Identifiers: "Nodeprep" http://www.ietf.org/internet-drafts/draft-ietf-xmpp-nodeprep-01.txt
|
||||
* XMPP Resource Identifiers: "Resourceprep" http://www.ietf.org/internet-drafts/draft-ietf-xmpp-resourceprep-01.txt
|
||||
* ANONYMOUS SASL tokens: "plain" http://www.ietf.org/internet-drafts/draft-ietf-sasl-anon-00.txt
|
||||
* iSCSI http://www.ietf.org/internet-drafts/draft-ietf-ips-iscsi-string-prep-03.txt
|
||||
*/
|
||||
class StringPrep : public UObject{
|
||||
|
||||
protected:
|
||||
UVersionInfo unicodeVersion; /** The Character repertoire version of this profile */
|
||||
UBool bidiCheck; /** Option to turn BiDi checking on */
|
||||
UBool doNFKC; /** Option to turn NFKC on */
|
||||
|
||||
/**
|
||||
* Protected default constructor sub classes
|
||||
*/
|
||||
StringPrep(){};
|
||||
|
||||
public:
|
||||
/**
|
||||
* Destructor
|
||||
*/
|
||||
virtual inline ~StringPrep(){};
|
||||
|
||||
/**
|
||||
* Map every character in input stream with mapping character
|
||||
* in the mapping table and populate the output stream.
|
||||
* For any individual character the mapping table may specify
|
||||
* that that a character be mapped to nothing, mapped to one
|
||||
* other character or to a string of other characters.
|
||||
*
|
||||
* @param src Pointer to UChar buffer containing a single label
|
||||
* @param srcLength Number of characters in the source label
|
||||
* @param dest Pointer to the destination buffer to receive the output
|
||||
* @param destCapacity The capacity of destination array
|
||||
* @param allowUnassigned Unassigned values can be converted to ASCII for query operations
|
||||
* If TRUE unassigned values are treated as normal Unicode code point.
|
||||
* If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
* @param status ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return The number of UChars in the destination buffer
|
||||
*
|
||||
*/
|
||||
virtual int32_t map(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
UBool allowUnassigned,
|
||||
UParseError* parseError,
|
||||
UErrorCode& status );
|
||||
|
||||
/**
|
||||
* Normalize the input stream using Normalization Form KC (NFKC)
|
||||
*
|
||||
* @param src Pointer to UChar buffer containing a single label
|
||||
* @param srcLength Number of characters in the source label
|
||||
* @param dest Pointer to the destination buffer to receive the output
|
||||
* @param destCapacity The capacity of destination array
|
||||
* @param status ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return The number of UChars in the destination buffer
|
||||
*
|
||||
*
|
||||
*/
|
||||
virtual int32_t normalize( const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
UErrorCode& status );
|
||||
|
||||
|
||||
/**
|
||||
* Prepare the input stream with for use. This operation maps, normalizes(NFKC),
|
||||
* checks for prohited and BiDi characters in the order defined by RFC 3454
|
||||
*
|
||||
* @param src Pointer to UChar buffer containing a single label
|
||||
* @param srcLength Number of characters in the source label
|
||||
* @param dest Pointer to the destination buffer to receive the output
|
||||
* @param destCapacity The capacity of destination array
|
||||
* @param allowUnassigned Unassigned values can be converted to ASCII for query operations
|
||||
* If TRUE unassigned values are treated as normal Unicode code point.
|
||||
* If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
|
||||
* @param status ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return The number of UChars in the destination buffer
|
||||
*
|
||||
*
|
||||
*/
|
||||
virtual int32_t process(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
UBool allowUnassigned,
|
||||
UParseError* parseError,
|
||||
UErrorCode& status );
|
||||
|
||||
/**
|
||||
* Create a profile from prebuilt default Nameprep profile conforming to
|
||||
* nameprep internet draft (http://www.ietf.org/html.charters/idn-charter.html).
|
||||
* This is a built-in/unmodifiable profile.
|
||||
*
|
||||
* @param status ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return Pointer to StringPrep object that is created. Should be deleted by
|
||||
* by caller
|
||||
*
|
||||
*
|
||||
*/
|
||||
static StringPrep* createNameprepInstance(UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create a profile from prebuilt default StringPrep profile conforming to
|
||||
* RFC 3454 (ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt).
|
||||
* User defined profiles can be created by getting the default profile and
|
||||
* adding mappings, removing mappings, turning options ON/OFF and prohibiting
|
||||
* characters from the output.
|
||||
*
|
||||
* @param status ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return Pointer to StringPrep object that is created. Should be deleted by
|
||||
* the caller.
|
||||
*
|
||||
*
|
||||
*/
|
||||
static StringPrep* createDefaultInstance(UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Ascertain if the given code point is a Letter/Digit/Hyphen in the ASCII range
|
||||
*
|
||||
* @return TRUE is the code point is a Letter/Digit/Hyphen
|
||||
*
|
||||
*
|
||||
*/
|
||||
static inline UBool isLDHChar(UChar32 ch);
|
||||
|
||||
/**
|
||||
* Ascertain if the given code point is a label separator as specified by IDNA
|
||||
*
|
||||
* @return TRUE is the code point is a label separator
|
||||
*
|
||||
*
|
||||
*/
|
||||
virtual UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Get the BiDi option of this profile
|
||||
*
|
||||
*
|
||||
*/
|
||||
inline UBool getCheckBiDi();
|
||||
|
||||
/**
|
||||
* Get the normalization (NFKC) option of this profile
|
||||
*
|
||||
* @return The normalization option
|
||||
*
|
||||
*
|
||||
*/
|
||||
inline UBool getNormalization();
|
||||
|
||||
/**
|
||||
* Get the Unicode version which this profile
|
||||
* conforms to
|
||||
*
|
||||
*
|
||||
*/
|
||||
inline void getUnicodeVersion(UVersionInfo& info);
|
||||
|
||||
private:
|
||||
// Boiler plate
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*
|
||||
*/
|
||||
StringPrep(const StringPrep&);
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
*
|
||||
*/
|
||||
StringPrep& operator=(const StringPrep&);
|
||||
|
||||
/**
|
||||
* Return true if another object is semantically equal to this one.
|
||||
*
|
||||
* @param other the object to be compared with.
|
||||
* @return true if another object is semantically equal to this one.
|
||||
*
|
||||
*/
|
||||
UBool operator==(const StringPrep& other) const {return FALSE;};
|
||||
|
||||
/**
|
||||
* Return true if another object is semantically unequal to this one.
|
||||
*
|
||||
* @param other the object to be compared with.
|
||||
* @return true if another object is semantically unequal to this one.
|
||||
*
|
||||
*/
|
||||
UBool operator!=(const StringPrep& other) const { return !operator==(other); }
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for this class.
|
||||
*
|
||||
*
|
||||
*/
|
||||
static inline UClassID getStaticClassID();
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
||||
*
|
||||
*
|
||||
*/
|
||||
virtual inline UClassID getDynamicClassID() const;
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* Sub classes that slightly modify the default profile
|
||||
* implement this method to remove characters to
|
||||
* the prohibited list. The default implementation does not
|
||||
* check if the data is loaded or not. The caller is responsible
|
||||
* for checking for data.
|
||||
*
|
||||
*/
|
||||
virtual UBool isNotProhibited(UChar32 ch);
|
||||
|
||||
/**
|
||||
* Sub classes that slightly modify the default profile
|
||||
* implement this method to remove characters to
|
||||
* the unassigned list. The default implementation does not
|
||||
* check if the data is loaded or not. The caller is responsible
|
||||
* for checking for data.
|
||||
*/
|
||||
virtual UBool isUnassigned(UChar32 ch);
|
||||
|
||||
/**
|
||||
* Ascertains if uidna.icu data file is loaded.
|
||||
* If data is not loaded, loads the data file.
|
||||
*
|
||||
*
|
||||
*/
|
||||
static UBool isDataLoaded(UErrorCode& status);
|
||||
|
||||
private:
|
||||
|
||||
/**
|
||||
* The address of this static class variable serves as this class's ID
|
||||
* for ICU "poor man's RTTI".
|
||||
*/
|
||||
static const char fgClassID;
|
||||
|
||||
};
|
||||
|
||||
inline UBool StringPrep::getCheckBiDi(){
|
||||
return bidiCheck;
|
||||
}
|
||||
|
||||
|
||||
inline UBool StringPrep::getNormalization(){
|
||||
return doNFKC;
|
||||
}
|
||||
|
||||
inline void StringPrep::getUnicodeVersion(UVersionInfo& info){
|
||||
for(int32_t i=0; i< (sizeof(info)/sizeof(info[0])); i++){
|
||||
info[i] = unicodeVersion[i];
|
||||
}
|
||||
}
|
||||
|
||||
inline UClassID StringPrep::getStaticClassID() {
|
||||
return (UClassID)&fgClassID;
|
||||
}
|
||||
|
||||
inline UClassID StringPrep::getDynamicClassID() const {
|
||||
return getStaticClassID();
|
||||
}
|
||||
|
||||
inline UBool StringPrep::isLDHChar(UChar32 ch){
|
||||
// high runner case
|
||||
if(ch>0x007A){
|
||||
return FALSE;
|
||||
}
|
||||
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
|
||||
if( (ch==0x002D) ||
|
||||
(0x0030 <= ch && ch <= 0x0039) ||
|
||||
(0x0041 <= ch && ch <= 0x005A) ||
|
||||
(0x0061 <= ch && ch <= 0x007A)
|
||||
){
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
||||
|
735
icu4c/source/common/uidna.cpp
Normal file
735
icu4c/source/common/uidna.cpp
Normal file
|
@ -0,0 +1,735 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: strprep.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003feb1
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
#include "unicode/uidna.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "strprep.h"
|
||||
#include "punycode.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "cmemory.h"
|
||||
#include "sprpimpl.h"
|
||||
|
||||
/* it is official IDNA ACE Prefix is "xn--" */
|
||||
static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
|
||||
#define ACE_PREFIX_LENGTH 4
|
||||
|
||||
#define MAX_LABEL_LENGTH 63
|
||||
#define HYPHEN 0x002D
|
||||
/* The Max length of the labels should not be more than 64 */
|
||||
#define MAX_LABEL_BUFFER_SIZE 100
|
||||
#define MAX_IDN_BUFFER_SIZE 300
|
||||
|
||||
#define CAPITAL_A 0x0041
|
||||
#define CAPITAL_Z 0x005A
|
||||
#define LOWER_CASE_DELTA 0x0020
|
||||
#define FULL_STOP 0x002E
|
||||
|
||||
inline static UBool
|
||||
startsWithPrefix(const UChar* src , int32_t srcLength){
|
||||
UBool startsWithPrefix = TRUE;
|
||||
|
||||
if(srcLength < ACE_PREFIX_LENGTH){
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
|
||||
if(u_tolower(src[i]) != ACE_PREFIX[i]){
|
||||
startsWithPrefix = FALSE;
|
||||
}
|
||||
}
|
||||
return startsWithPrefix;
|
||||
}
|
||||
|
||||
inline static UChar
|
||||
toASCIILower(UChar ch){
|
||||
if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
|
||||
return ch + LOWER_CASE_DELTA;
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
||||
inline static int32_t
|
||||
compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
|
||||
const UChar* s2, int32_t s2Len){
|
||||
if(s1Len != s2Len){
|
||||
return (s1Len > s2Len) ? s1Len : s2Len;
|
||||
}
|
||||
UChar c1,c2;
|
||||
int32_t rc;
|
||||
|
||||
for(int32_t i =0;/* no condition */;i++) {
|
||||
/* If we reach the ends of both strings then they match */
|
||||
if(i == s1Len) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
c1 = s1[i];
|
||||
c2 = s2[i];
|
||||
|
||||
/* Case-insensitive comparison */
|
||||
if(c1!=c2) {
|
||||
rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static inline
|
||||
void syntaxError(const UChar* rules,
|
||||
int32_t pos,
|
||||
int32_t rulesLen,
|
||||
UParseError* parseError) {
|
||||
|
||||
if(parseError == NULL){
|
||||
return;
|
||||
}
|
||||
if(pos == rulesLen && rulesLen >0){
|
||||
pos--;
|
||||
}
|
||||
parseError->offset = pos;
|
||||
parseError->line = 0 ; // we are not using line numbers
|
||||
|
||||
// for pre-context
|
||||
int32_t start = (pos <=U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
|
||||
int32_t stop = pos;
|
||||
|
||||
u_memcpy(parseError->preContext,rules+start,stop-start);
|
||||
//null terminate the buffer
|
||||
parseError->preContext[stop-start] = 0;
|
||||
|
||||
//for post-context
|
||||
start = pos+1;
|
||||
stop = ((pos+U_PARSE_CONTEXT_LEN)<= rulesLen )? (pos+(U_PARSE_CONTEXT_LEN-1)) :
|
||||
rulesLen;
|
||||
|
||||
u_memcpy(parseError->postContext,rules+start,stop-start);
|
||||
//null terminate the buffer
|
||||
parseError->postContext[stop-start]= 0;
|
||||
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_toASCII(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status){
|
||||
|
||||
if(status == NULL || U_FAILURE(*status)){
|
||||
return 0;
|
||||
}
|
||||
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
|
||||
//initialize pointers to stack buffers
|
||||
UChar *b1 = b1Stack, *b2 = b2Stack;
|
||||
int32_t b1Len, b2Len,
|
||||
b1Capacity = MAX_LABEL_BUFFER_SIZE,
|
||||
b2Capacity = MAX_LABEL_BUFFER_SIZE ,
|
||||
reqLength=0;
|
||||
|
||||
|
||||
UBool* caseFlags = NULL;
|
||||
|
||||
// the source contains all ascii codepoints
|
||||
UBool srcIsASCII = TRUE;
|
||||
// assume the source contains all LDH codepoints
|
||||
UBool srcIsLDH = TRUE;
|
||||
|
||||
int32_t j=0;
|
||||
|
||||
//get the options
|
||||
UBool allowUnassigned = options & UIDNA_ALLOW_UNASSIGNED;
|
||||
UBool useSTD3ASCIIRules = (options & UIDNA_USE_STD3_RULES) >>1;
|
||||
|
||||
int32_t failPos = -1;
|
||||
// step 2
|
||||
StringPrep* prep = StringPrep::createNameprepInstance(*status);
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned, parseError, *status);
|
||||
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// redo processing of string
|
||||
// we do not have enough room so grow the buffer
|
||||
b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
|
||||
if(b1==NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
*status = U_ZERO_ERROR; // reset error
|
||||
|
||||
b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
|
||||
}
|
||||
// error bail out
|
||||
if(U_FAILURE(*status)){
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
// step 3 & 4
|
||||
for( j=0;j<b1Len;j++){
|
||||
if(b1[j] > 0x7F){
|
||||
srcIsASCII = FALSE;
|
||||
}
|
||||
// here we do not assemble surrogates
|
||||
// since we know that LDH code points
|
||||
// are in the ASCII range only
|
||||
if(prep->isLDHChar(b1[j])==FALSE){
|
||||
srcIsLDH = FALSE;
|
||||
failPos = j;
|
||||
}
|
||||
}
|
||||
|
||||
if(useSTD3ASCIIRules == TRUE){
|
||||
// verify 3a and 3b
|
||||
if( srcIsLDH == FALSE /* source contains some non-LDH characters */
|
||||
|| b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){
|
||||
*status = U_IDNA_STD3_ASCII_RULES_ERROR;
|
||||
|
||||
/* populate the parseError struct */
|
||||
if(srcIsLDH==FALSE){
|
||||
syntaxError(b1,failPos-1,b1Len,parseError);
|
||||
}else if(b1[0] == HYPHEN){
|
||||
syntaxError(b1,0,b1Len,parseError);
|
||||
}else{
|
||||
syntaxError(b1,b1Len-1,b1Len,parseError);
|
||||
}
|
||||
|
||||
goto CLEANUP;
|
||||
}
|
||||
}
|
||||
if(srcIsASCII){
|
||||
if(b1Len <= destCapacity){
|
||||
uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR);
|
||||
reqLength = b1Len;
|
||||
}else{
|
||||
reqLength = b1Len;
|
||||
goto CLEANUP;
|
||||
}
|
||||
}else{
|
||||
// step 5 : verify the sequence does not begin with ACE prefix
|
||||
if(!startsWithPrefix(b1,b1Len)){
|
||||
|
||||
//step 6: encode the sequence with punycode
|
||||
caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
|
||||
|
||||
b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status);
|
||||
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// redo processing of string
|
||||
/* we do not have enough room so grow the buffer*/
|
||||
b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
|
||||
if(b2 == NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
*status = U_ZERO_ERROR; // reset error
|
||||
|
||||
b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status);
|
||||
}
|
||||
//error bail out
|
||||
if(U_FAILURE(*status)){
|
||||
goto CLEANUP;
|
||||
}
|
||||
reqLength = b2Len+ACE_PREFIX_LENGTH;
|
||||
|
||||
if(reqLength > destCapacity){
|
||||
*status = U_BUFFER_OVERFLOW_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
//Step 7: prepend the ACE prefix
|
||||
uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR);
|
||||
//Step 6: copy the contents in b2 into dest
|
||||
uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR);
|
||||
|
||||
}else{
|
||||
*status = U_IDNA_ACE_PREFIX_ERROR;
|
||||
syntaxError(b1,0,b1Len,parseError);
|
||||
goto CLEANUP;
|
||||
}
|
||||
}
|
||||
|
||||
if(reqLength > MAX_LABEL_LENGTH){
|
||||
*status = U_IDNA_LABEL_TOO_LONG_ERROR;
|
||||
}
|
||||
|
||||
CLEANUP:
|
||||
if(b1 != b1Stack){
|
||||
uprv_free(b1);
|
||||
}
|
||||
if(b2 != b2Stack){
|
||||
uprv_free(b2);
|
||||
}
|
||||
uprv_free(caseFlags);
|
||||
|
||||
delete prep;
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, reqLength, status);
|
||||
}
|
||||
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_toUnicode(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status){
|
||||
|
||||
if(status == NULL || U_FAILURE(*status)){
|
||||
return 0;
|
||||
}
|
||||
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//get the options
|
||||
UBool allowUnassigned = options & UIDNA_ALLOW_UNASSIGNED;
|
||||
UBool useSTD3ASCIIRules = (options & UIDNA_USE_STD3_RULES) >>1;
|
||||
|
||||
UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
|
||||
|
||||
//initialize pointers to stack buffers
|
||||
UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
|
||||
int32_t b1Len, b2Len, b1PrimeLen, b3Len,
|
||||
b1Capacity = MAX_LABEL_BUFFER_SIZE,
|
||||
b2Capacity = MAX_LABEL_BUFFER_SIZE,
|
||||
b3Capacity = MAX_LABEL_BUFFER_SIZE,
|
||||
reqLength=0;
|
||||
|
||||
StringPrep* prep = StringPrep::createNameprepInstance(*status);
|
||||
b1Len = 0;
|
||||
UBool* caseFlags = NULL;
|
||||
|
||||
UBool srcIsASCII = TRUE;
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
goto CLEANUP;
|
||||
}
|
||||
// step 1: find out if all the codepoints in src are ASCII
|
||||
if(srcLength==-1){
|
||||
srcLength = 0;
|
||||
for(;src[srcLength]!=0;){
|
||||
if(src[srcLength]> 0x7f){
|
||||
srcIsASCII = FALSE;
|
||||
}
|
||||
srcLength++;
|
||||
}
|
||||
}else{
|
||||
for(int32_t j=0; j<srcLength; j++){
|
||||
if(src[j]> 0x7f){
|
||||
srcIsASCII = FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(srcIsASCII == FALSE){
|
||||
// step 2: process the string
|
||||
b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parseError, *status);
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// redo processing of string
|
||||
/* we do not have enough room so grow the buffer*/
|
||||
b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
|
||||
if(b1==NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
*status = U_ZERO_ERROR; // reset error
|
||||
|
||||
b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
|
||||
}
|
||||
//bail out on error
|
||||
if(U_FAILURE(*status)){
|
||||
goto CLEANUP;
|
||||
}
|
||||
}else{
|
||||
|
||||
//just point src to b1
|
||||
b1 = (UChar*) src;
|
||||
b1Len = srcLength;
|
||||
}
|
||||
//step 3: verify ACE Prefix
|
||||
if(startsWithPrefix(src,srcLength)){
|
||||
|
||||
//step 4: Remove the ACE Prefix
|
||||
b1Prime = b1 + ACE_PREFIX_LENGTH;
|
||||
b1PrimeLen = b1Len - ACE_PREFIX_LENGTH;
|
||||
|
||||
//step 5: Decode using punycode
|
||||
b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status);
|
||||
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// redo processing of string
|
||||
/* we do not have enough room so grow the buffer*/
|
||||
b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
|
||||
if(b2==NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
*status = U_ZERO_ERROR; // reset error
|
||||
|
||||
b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status);
|
||||
|
||||
}
|
||||
|
||||
|
||||
//step 6:Apply toASCII
|
||||
b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity,options,parseError, status);
|
||||
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// redo processing of string
|
||||
/* we do not have enough room so grow the buffer*/
|
||||
b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
|
||||
if(b3==NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
*status = U_ZERO_ERROR; // reset error
|
||||
|
||||
b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status);
|
||||
|
||||
}
|
||||
//bail out on error
|
||||
if(U_FAILURE(*status)){
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
//step 7: verify
|
||||
if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
|
||||
*status = U_IDNA_VERIFICATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
//step 8: return output of step 5
|
||||
reqLength = b2Len;
|
||||
if(b2Len <= destCapacity) {
|
||||
uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR);
|
||||
}
|
||||
}else{
|
||||
//copy the source to destination
|
||||
if(srcLength <= destCapacity){
|
||||
uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
|
||||
}
|
||||
reqLength = srcLength;
|
||||
}
|
||||
|
||||
CLEANUP:
|
||||
|
||||
if(b1 != b1Stack && b1!=src){
|
||||
uprv_free(b1);
|
||||
}
|
||||
if(b2 != b2Stack){
|
||||
uprv_free(b2);
|
||||
}
|
||||
uprv_free(caseFlags);
|
||||
|
||||
delete prep;
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, reqLength, status);
|
||||
}
|
||||
|
||||
static int32_t
|
||||
getNextSeparator(UChar *src,int32_t srcLength,StringPrep* prep,
|
||||
UChar **limit,
|
||||
UBool *done,
|
||||
UErrorCode *status){
|
||||
if(srcLength == -1){
|
||||
int32_t i;
|
||||
for(i=0 ; ;i++){
|
||||
if(src[i] == 0){
|
||||
*limit = src + i; // point to null
|
||||
*done = TRUE;
|
||||
return i;
|
||||
}
|
||||
if(prep->isLabelSeparator(src[i],*status)){
|
||||
*limit = src + (i+1); // go past the delimiter
|
||||
return i;
|
||||
|
||||
}
|
||||
}
|
||||
// we have not found the delimiter
|
||||
if(i==srcLength){
|
||||
*limit = src+srcLength;
|
||||
*done = TRUE;
|
||||
}
|
||||
return i;
|
||||
}else{
|
||||
int32_t i;
|
||||
for(i=0;i<srcLength;i++){
|
||||
if(prep->isLabelSeparator(src[i],*status)){
|
||||
*limit = src + (i+1); // go past the delimiter
|
||||
return i;
|
||||
}
|
||||
}
|
||||
// we have not found the delimiter
|
||||
if(i==srcLength){
|
||||
*limit = src+srcLength;
|
||||
*done = TRUE;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_IDNToASCII( const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status){
|
||||
|
||||
if(status == NULL || U_FAILURE(*status)){
|
||||
return 0;
|
||||
}
|
||||
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//get the options
|
||||
UBool allowUnassigned = options & UIDNA_ALLOW_UNASSIGNED;
|
||||
UBool useSTD3ASCIIRules = (options & UIDNA_USE_STD3_RULES) >>1;
|
||||
|
||||
UChar *start=NULL, *limit=NULL;
|
||||
|
||||
int32_t reqLength = 0;
|
||||
|
||||
StringPrep* prep = StringPrep::createNameprepInstance(*status);
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
return 0;
|
||||
}
|
||||
|
||||
//initialize pointers to stack buffers
|
||||
UChar b1Stack[MAX_LABEL_BUFFER_SIZE];
|
||||
UChar *b1 = b1Stack;
|
||||
int32_t b1Len, labelLen;
|
||||
UChar* delimiter = (UChar*)src;
|
||||
UChar* labelStart = (UChar*)src;
|
||||
int32_t remainingLen = srcLength;
|
||||
int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE;
|
||||
UBool done = FALSE;
|
||||
|
||||
|
||||
for(;;){
|
||||
|
||||
labelLen = getNextSeparator(labelStart, -1, prep, &delimiter,&done, status);
|
||||
|
||||
b1Len = uidna_toASCII(labelStart, labelLen, b1, b1Capacity,
|
||||
options, parseError, status);
|
||||
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// for pre-flighting we already know the return length
|
||||
// do not re-process the string just save the length
|
||||
// and reset error code
|
||||
|
||||
*status = U_ZERO_ERROR; // reset error
|
||||
}
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
break;
|
||||
}
|
||||
int32_t tempLen = (reqLength + b1Len );
|
||||
|
||||
// copy to dest
|
||||
if( tempLen <= destCapacity){
|
||||
uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR);
|
||||
}
|
||||
|
||||
reqLength = tempLen;
|
||||
|
||||
// add the label separator
|
||||
if(done==FALSE){
|
||||
if(reqLength < destCapacity){
|
||||
dest[reqLength] = FULL_STOP;
|
||||
}
|
||||
reqLength++;
|
||||
}
|
||||
|
||||
labelStart = delimiter;
|
||||
|
||||
if(done == TRUE){
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(b1 != b1Stack){
|
||||
uprv_free(b1);
|
||||
}
|
||||
|
||||
delete prep;
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, reqLength, status);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status){
|
||||
|
||||
if(status == NULL || U_FAILURE(*status)){
|
||||
return 0;
|
||||
}
|
||||
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
UChar *start=NULL, *limit=NULL;
|
||||
|
||||
int32_t reqLength = 0;
|
||||
|
||||
StringPrep* prep = StringPrep::createNameprepInstance(*status);
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
return 0;
|
||||
}
|
||||
|
||||
//initialize pointers to stack buffers
|
||||
UChar b1Stack[MAX_LABEL_BUFFER_SIZE];
|
||||
UChar *b1 = b1Stack;
|
||||
int32_t b1Len, labelLen;
|
||||
UChar* delimiter = (UChar*)src;
|
||||
UChar* labelStart = (UChar*)src;
|
||||
int32_t remainingLen = srcLength;
|
||||
int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE;
|
||||
UBool done = FALSE;
|
||||
|
||||
for(;;){
|
||||
|
||||
labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status);
|
||||
|
||||
|
||||
b1Len = uidna_toUnicode( labelStart,labelLen, b1, b1Capacity,
|
||||
options, parseError, status);
|
||||
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// for pre-flighting we already know the return length
|
||||
// do not re-process the string just save the length
|
||||
// and reset error code
|
||||
*status = U_ZERO_ERROR; // reset error
|
||||
}
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
break;
|
||||
}
|
||||
int32_t tempLen = (reqLength + b1Len );
|
||||
// copy to dest
|
||||
if( tempLen <= destCapacity){
|
||||
uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR);
|
||||
}
|
||||
|
||||
reqLength = tempLen;
|
||||
// add the label separator
|
||||
if(done==FALSE){
|
||||
if(reqLength < destCapacity){
|
||||
dest[reqLength] = FULL_STOP;
|
||||
}
|
||||
reqLength++;
|
||||
}
|
||||
|
||||
labelStart = delimiter;
|
||||
|
||||
if(done==TRUE){
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(b1 != b1Stack){
|
||||
uprv_free(b1);
|
||||
}
|
||||
|
||||
delete prep;
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, reqLength, status);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_compare( const UChar *s1, int32_t length1,
|
||||
const UChar *s2, int32_t length2,
|
||||
int32_t options,
|
||||
UErrorCode* status){
|
||||
|
||||
if(status == NULL || U_FAILURE(*status)){
|
||||
return -1;
|
||||
}
|
||||
|
||||
UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
|
||||
UChar *b1 = b1Stack, *b2 = b2Stack;
|
||||
int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
|
||||
int32_t result;
|
||||
|
||||
UParseError parseError;
|
||||
|
||||
b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// redo processing of string
|
||||
b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
|
||||
if(b1==NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
*status = U_ZERO_ERROR; // reset error
|
||||
|
||||
b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
|
||||
|
||||
}
|
||||
|
||||
b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status);
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// redo processing of string
|
||||
b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
|
||||
if(b2==NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
*status = U_ZERO_ERROR; // reset error
|
||||
|
||||
b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status);
|
||||
|
||||
}
|
||||
// when toASCII is applied all label separators are replaced with FULL_STOP
|
||||
result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
|
||||
|
||||
CLEANUP:
|
||||
if(b1 != b1Stack){
|
||||
uprv_free(b1);
|
||||
}
|
||||
|
||||
if(b2 != b2Stack){
|
||||
uprv_free(b2);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
282
icu4c/source/common/unicode/uidna.h
Normal file
282
icu4c/source/common/unicode/uidna.h
Normal file
|
@ -0,0 +1,282 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: uidna.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003feb1
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#ifndef __UIDNA_H__
|
||||
#define __UIDNA_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/parseerr.h"
|
||||
|
||||
/**
|
||||
*\file
|
||||
* UIDNA API implements the IDNA protocol as defined in the IDNA draft
|
||||
* (http://www.ietf.org/internet-drafts/draft-ietf-idn-idna-14.txt).
|
||||
* The draft defines 2 operations: ToASCII and ToUnicode. Domain labels
|
||||
* containing non-ASCII code points are required to be processed by
|
||||
* ToASCII operation before passing it to resolver libraries. Domain names
|
||||
* that are obtained from resolver libraries are required to be processed by
|
||||
* ToUnicode operation before displaying the domain name to the user.
|
||||
* IDNA requires that implementations process input strings with Nameprep
|
||||
* (http://www.ietf.org/internet-drafts/draft-ietf-idn-nameprep-11.txt),
|
||||
* which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
|
||||
* and then with Punycode (http://www.ietf.org/internet-drafts/draft-ietf-idn-punycode-03.txt).
|
||||
* Implementations of IDNA MUST fully implement Nameprep and Punycode;
|
||||
* neither Nameprep nor Punycode are optional.
|
||||
* The input and output of ToASCII and ToUnicode operations are Unicode
|
||||
* and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
|
||||
* multiple times to an input string will yield the same result as applying the operation
|
||||
* once.
|
||||
* ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
|
||||
* ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
|
||||
*\end_file
|
||||
*/
|
||||
|
||||
#define UIDNA_DEFAULT 0x0000
|
||||
#define UIDNA_ALLOW_UNASSIGNED 0x0001
|
||||
#define UIDNA_USE_STD3_RULES 0x0002
|
||||
|
||||
/**
|
||||
* This function implements the ToASCII operation as defined in the IDNA draft.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
* ASCII names. A label is an individual part of a domain name. Labels are usually
|
||||
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
|
||||
* "www","example", and "com".
|
||||
*
|
||||
*
|
||||
* @param src Input UChar array containing label in Unicode.
|
||||
* @param srcLength Number of UChars in src, or -1 if NUL-terminated.
|
||||
* @param dest Output UChar array with ASCII (ACE encoded) label.
|
||||
* @param destCapacity Size of dest.
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
*
|
||||
* - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
*
|
||||
* @param parseError Pointer to UParseError struct to receive information on position
|
||||
* of error if an error is encountered. Can be NULL.
|
||||
* @param status ICU in/out error code parameter.
|
||||
* U_INVALID_CHAR_FOUND if src contains
|
||||
* unmatched single surrogates.
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
|
||||
* too many code points.
|
||||
* U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
|
||||
* @return Number of ASCII characters converted.
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_toASCII(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status);
|
||||
|
||||
|
||||
/**
|
||||
* This function implements the ToUnicode operation as defined in the IDNA draft.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
* Unicode names. A label is an individual part of a domain name. Labels are usually
|
||||
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
|
||||
* "www","example", and "com".
|
||||
*
|
||||
* @param src Input UChar array containing ASCII (ACE encoded) label.
|
||||
* @param srcLength Number of UChars in src, or -1 if NUL-terminated.
|
||||
* @param dest Output Converted UChar array containing Unicode equivalent of label.
|
||||
* @param destCapacity Size of dest.
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
*
|
||||
* - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points. <b> Note: </b> This option is
|
||||
* required on toUnicode operation because the draft mandates
|
||||
* verification of decoded ACE input by applying toASCII and comparing
|
||||
* its output with source
|
||||
*
|
||||
*
|
||||
*
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
*
|
||||
* @param parseError Pointer to UParseError struct to receive information on position
|
||||
* of error if an error is encountered. Can be NULL.
|
||||
* @param status ICU in/out error code parameter.
|
||||
* U_INVALID_CHAR_FOUND if src contains
|
||||
* unmatched single surrogates.
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
|
||||
* too many code points.
|
||||
* U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
|
||||
* @return Number of Unicode characters converted.
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_toUnicode(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status);
|
||||
|
||||
|
||||
/**
|
||||
* Convenience function that implements the IDNToASCII operation as defined in the IDNA draft.
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
* It is important to note that this operation can fail. If it fails, then the input
|
||||
* domain name cannot be used as an Internationalized Domain Name and the application
|
||||
* should have methods defined to deal with the failure.
|
||||
*
|
||||
* <b>Note:</b> IDNA draft specifies that a conformant application should divide a domain name
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src Input UChar array containing IDN in Unicode.
|
||||
* @param srcLength Number of UChars in src, or -1 if NUL-terminated.
|
||||
* @param dest Output UChar array with ASCII (ACE encoded) IDN.
|
||||
* @param destCapacity Size of dest.
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
*
|
||||
* - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
*
|
||||
* @param parseError Pointer to UParseError struct to receive information on position
|
||||
* of error if an error is encountered. Can be NULL.
|
||||
* @param status ICU in/out error code parameter.
|
||||
* U_INVALID_CHAR_FOUND if src contains
|
||||
* unmatched single surrogates.
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
|
||||
* too many code points.
|
||||
* U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
|
||||
* @return Number of ASCII characters converted.
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_IDNToASCII( const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Convenience function that implements the IDNToUnicode operation as defined in the IDNA draft.
|
||||
* This operation is done on complete domain names, e.g: "www.example.com".
|
||||
*
|
||||
* <b>Note:</b> IDNA draft specifies that a conformant application should divide a domain name
|
||||
* into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
|
||||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src Input UChar array containing IDN in ASCII (ACE encoded) form.
|
||||
* @param srcLength Number of UChars in src, or -1 if NUL-terminated.
|
||||
* @param dest Output UChar array containing Unicode equivalent of source IDN.
|
||||
* @param destCapacity Size of dest.
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
*
|
||||
* - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
*
|
||||
* @param parseError Pointer to UParseError struct to receive information on position
|
||||
* of error if an error is encountered. Can be NULL.
|
||||
* @param status ICU in/out error code parameter.
|
||||
* U_INVALID_CHAR_FOUND if src contains
|
||||
* unmatched single surrogates.
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
|
||||
* too many code points.
|
||||
* U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
|
||||
* @return Number of ASCII characters converted.
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Compare two strings for IDNs for equivalence.
|
||||
* This function splits the domain names into labels and compares them.
|
||||
* According to IDN draft, whenever two labels are compared, they are
|
||||
* considered equal if and only if their ASCII forms (obtained by
|
||||
* applying toASCII) match using an case-insensitive ASCII comparison.
|
||||
* Two domain names are considered a match if and only if all labels
|
||||
* match regardless of whether label separators match.
|
||||
*
|
||||
* @param s1 First source string.
|
||||
* @param length1 Length of first source string, or -1 if NUL-terminated.
|
||||
*
|
||||
* @param s2 Second source string.
|
||||
* @param length2 Length of second source string, or -1 if NUL-terminated.
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
*
|
||||
* - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
*
|
||||
* @param status ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return <0 or 0 or >0 as usual for string comparisons
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_compare( const UChar *s1, int32_t length1,
|
||||
const UChar *s2, int32_t length2,
|
||||
int32_t options,
|
||||
UErrorCode* status);
|
||||
|
||||
#endif
|
|
@ -631,8 +631,21 @@ typedef enum UErrorCode {
|
|||
U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */
|
||||
U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */
|
||||
U_REGEX_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for regexp errors */
|
||||
|
||||
/*
|
||||
* The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes
|
||||
*/
|
||||
U_IDNA_ERROR_START=0x10400,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR,
|
||||
U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR,
|
||||
U_IDNA_CHECK_BIDI_ERROR,
|
||||
U_IDNA_STD3_ASCII_RULES_ERROR,
|
||||
U_IDNA_ACE_PREFIX_ERROR,
|
||||
U_IDNA_VERIFICATION_ERROR,
|
||||
U_IDNA_LABEL_TOO_LONG_ERROR,
|
||||
U_IDNA_ERROR_LIMIT,
|
||||
|
||||
U_ERROR_LIMIT=U_REGEX_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
|
||||
U_ERROR_LIMIT=U_IDNA_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
|
||||
} UErrorCode;
|
||||
|
||||
/* Use the following to determine if an UErrorCode represents */
|
||||
|
|
Loading…
Add table
Reference in a new issue