ICU-3050 StringPrep API first cut

X-SVN-Rev: 12674
This commit is contained in:
Ram Viswanadha 2003-07-24 23:23:19 +00:00
parent 1349672e9b
commit 9a043c2e5f
57 changed files with 7672 additions and 3692 deletions

8
.gitignore vendored
View file

@ -331,6 +331,14 @@ icu4c/source/tools/genrb/genjp/genjp.positions
icu4c/source/tools/genrb/genrb
icu4c/source/tools/genrb/genrb.[0-9]
icu4c/source/tools/genrb/tmp
icu4c/source/tools/gensprep/*.d
icu4c/source/tools/gensprep/*.pdb
icu4c/source/tools/gensprep/Debug
icu4c/source/tools/gensprep/Makefile
icu4c/source/tools/gensprep/Release
icu4c/source/tools/gensprep/gensprep
icu4c/source/tools/gensprep/gensprep.8
icu4c/source/tools/gensprep/gensprep.plg
icu4c/source/tools/gentest/*.d
icu4c/source/tools/gentest/*.pdb
icu4c/source/tools/gentest/Debug

View file

@ -96,10 +96,10 @@ Package=<4>
Project_Dep_Name genbrk
End Project Dependency
Begin Project Dependency
Project_Dep_Name genidna
Project_Dep_Name layoutex
End Project Dependency
Begin Project Dependency
Project_Dep_Name layoutex
Project_Dep_Name gensprep
End Project Dependency
}}}
@ -267,24 +267,6 @@ Package=<4>
###############################################################################
Project: "genidna"=..\tools\genidna\genidna.dsp - Package Owner=<4>
Package=<5>
{{{
}}}
Package=<4>
{{{
Begin Project Dependency
Project_Dep_Name common
End Project Dependency
Begin Project Dependency
Project_Dep_Name toolutil
End Project Dependency
}}}
###############################################################################
Project: "gennames"=..\tools\gennames\gennames.dsp - Package Owner=<4>
Package=<5>
@ -381,6 +363,18 @@ Package=<4>
###############################################################################
Project: "gensprep"=..\tools\gensprep\gensprep.dsp - Package Owner=<4>
Package=<5>
{{{
}}}
Package=<4>
{{{
}}}
###############################################################################
Project: "gentest"=..\tools\gentest\gentest.dsp - Package Owner=<4>
Package=<5>
@ -606,7 +600,7 @@ Package=<4>
Project_Dep_Name genbrk
End Project Dependency
Begin Project Dependency
Project_Dep_Name genidna
Project_Dep_Name gensprep
End Project Dependency
}}}

View file

@ -3302,14 +3302,6 @@ InputPath=.\unicode\utf_old.h
# PROP Default_Filter "*.c,*.h"
# Begin Source File
SOURCE=.\nameprep.cpp
# End Source File
# Begin Source File
SOURCE=.\nameprep.h
# End Source File
# Begin Source File
SOURCE=.\punycode.c
# End Source File
# Begin Source File
@ -3318,18 +3310,6 @@ SOURCE=.\punycode.h
# End Source File
# Begin Source File
SOURCE=.\sprpimpl.h
# End Source File
# Begin Source File
SOURCE=.\strprep.cpp
# End Source File
# Begin Source File
SOURCE=.\strprep.h
# End Source File
# Begin Source File
SOURCE=.\uidna.cpp
# End Source File
# Begin Source File
@ -3354,6 +3334,41 @@ InputPath=.\unicode\uidna.h
!ENDIF
# End Source File
# End Group
# Begin Group "sprep"
# PROP Default_Filter ""
# Begin Source File
SOURCE=.\sprpimpl.h
# End Source File
# Begin Source File
SOURCE=.\usprep.cpp
# End Source File
# Begin Source File
SOURCE=.\unicode\usprep.h
!IF "$(CFG)" == "common - Win32 Release"
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode\usprep.h
"..\..\include\unicode\usprep.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputPath) ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win64 Release"
!ELSEIF "$(CFG)" == "common - Win64 Debug"
!ENDIF
# End Source File
# End Group
# End Target

View file

@ -1,38 +0,0 @@
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: nameprep.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003feb1
* created by: Ram Viswanadha
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_IDNA
#include "nameprep.h"
// *****************************************************************************
// class NamePrep
// *****************************************************************************
U_NAMESPACE_BEGIN
const char NamePrep::fgClassID=0;
// default constructor
NamePrep::NamePrep(UErrorCode& status){
bidiCheck = TRUE;
doNFKC = TRUE;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_IDNA */

View file

@ -1,102 +0,0 @@
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: nameprep.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003feb1
* created by: Ram Viswanadha
*/
#ifndef NAMEPREP_H
#define NAMEPREP_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_IDNA
#include "strprep.h"
#include "unicode/uniset.h"
U_NAMESPACE_BEGIN
/*
A profile of stringprep MUST include all of the following:
- The intended applicability of the profile
- The character repertoire that is the input and output to stringprep
(which is Unicode 3.2 for this version of stringprep)
- The mapping tables from this document used (as described in section
3)
- Any additional mapping tables specific to the profile
- The Unicode normalization used, if any (as described in section 4)
- The tables from this document of characters that are prohibited as
output (as described in section 5)
- The bidirectional string testing used, if any (as described in
section 6)
- Any additional characters that are prohibited as output specific to
the profile
*/
class NamePrep: public StringPrep {
public :
NamePrep(UErrorCode& status);
virtual inline ~NamePrep(){};
virtual inline UBool isNotProhibited(UChar32 ch);
/**
* ICU "poor man's RTTI", returns a UClassID for this class.
*
* @draft ICU 2.6
*/
static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
/**
* ICU "poor man's RTTI", returns a UClassID for the actual class.
*
* @draft ICU 2.6
*/
virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
private:
/**
* The address of this static class variable serves as this class's ID
* for ICU "poor man's RTTI".
*/
static const char fgClassID;
};
inline UBool NamePrep::isNotProhibited(UChar32 ch){
return (UBool)(ch == 0x0020); /* ASCII_SPACE */
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_IDNA */
#endif
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/

View file

@ -2367,8 +2367,8 @@ _uRegexErrorName[U_REGEX_ERROR_LIMIT - U_REGEX_ERROR_START] = {
static const char * const
_uIDNAErrorName[U_IDNA_ERROR_LIMIT - U_IDNA_ERROR_START] = {
"U_IDNA_ERROR_START",
"U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR",
"U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR",
"U_IDNA_PROHIBITED_ERROR",
"U_IDNA_UNASSIGNED_ERROR",
"U_IDNA_CHECK_BIDI_ERROR",
"U_IDNA_STD3_ASCII_RULES_ERROR",
"U_IDNA_ACE_PREFIX_ERROR",

View file

@ -237,6 +237,7 @@ void RBBISetBuilder::build() {
NULL, // Data array (utrie will allocate one)
100000, // Max Data Length
0, // Initial value for all code points
0, // Lead surrogate unit value
TRUE); // Keep Latin 1 in separately

View file

@ -20,82 +20,136 @@
#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "unicode/parseerr.h"
#include "unicode/usprep.h"
#include "unicode/udata.h"
#include "utrie.h"
#if !UCONFIG_NO_IDNA
enum{
UIDNA_NO_VALUE = 0x0000 ,
UIDNA_UNASSIGNED = 0x0001 ,
UIDNA_PROHIBITED = 0x0002 ,
UIDNA_MAP_NFKC = 0x0003 ,
UIDNA_LABEL_SEPARATOR = 0x0004
typedef enum UStringPrepType UStringPrepType;
#define _SPREP_DATA_TYPE "spp"
enum UStringPrepType{
USPREP_UNASSIGNED = 0x0000 ,
USPREP_MAP = 0x0001 ,
USPREP_PROHIBITED = 0x0002 ,
USPREP_LABEL_SEPARATOR = 0x0003 ,
USPREP_DELETE = 0x0004 ,
USPREP_TYPE_LIMIT = 0x0005 ,
};
static const char* usprepTypeNames[] ={
"UNASSIGNED" ,
"MAP" ,
"PROHIBITED" ,
"LABEL_SEPARATOR" ,
"DELETE",
"TYPE_LIMIT"
};
enum{
_IDNA_LENGTH_IN_MAPPING_TABLE = 0x0003 /*11*/
_SPREP_NORMALIZATION_ON = 0x0001,
_SPREP_CHECK_BIDI_ON = 0x0002,
};
enum{
_SPREP_TYPE_THRESHOLD = 0xFFF0,
_SPREP_MAX_INDEX_VALUE = 0x3FBF, /*16139*/
_SPREP_MAX_INDEX_TOP_LENGTH = 0x0003
};
/* indexes[] value names */
enum {
_IDNA_INDEX_TRIE_SIZE, /* number of bytes in normalization trie */
_IDNA_INDEX_MAPPING_DATA_SIZE, /* The array that contains the mapping */
_IDNA_INDEX_TOP=3 /* changing this requires a new formatVersion */
_SPREP_INDEX_TRIE_SIZE = 0, /* number of bytes in normalization trie */
_SPREP_INDEX_MAPPING_DATA_SIZE = 1, /* The array that contains the mapping */
_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION = 2, /* The index of Unicode version of last entry in NormalizationCorrections.txt */
_SPREP_ONE_UCHAR_MAPPING_INDEX_START = 3, /* The starting index of 1 UChar mapping index in the mapping data array */
_SPREP_TWO_UCHARS_MAPPING_INDEX_START = 4, /* The starting index of 2 UChars mapping index in the mapping data array */
_SPREP_THREE_UCHARS_MAPPING_INDEX_START = 5,
_SPREP_FOUR_UCHARS_MAPPING_INDEX_START = 6,
_SPREP_OPTIONS = 7, /* Bit set of options to turn on in the profile */
_SPREP_INDEX_TOP=16 /* changing this requires a new formatVersion */
};
enum {
_IDNA_MAPPING_DATA_SIZE = 2000,
_IDNA_MAP_TO_NOTHING = 0x7FF
typedef struct UStringPrepKey UStringPrepKey;
struct UStringPrepKey{
char* name;
char* path;
};
#if defined(XP_CPLUSPLUS)
static inline
void uprv_syntaxError(const UChar* rules,
struct UStringPrepProfile{
int32_t indexes[_SPREP_INDEX_TOP];
UTrie sprepTrie;
const uint16_t* mappingData;
UDataMemory* sprepData;
UBool isDataLoaded;
int32_t refCount;
};
/**
* Helper function for populating the UParseError struct
* @internal
*/
U_CAPI void U_EXPORT2
uprv_syntaxError(const UChar* rules,
int32_t pos,
int32_t rulesLen,
UParseError* parseError)
{
if(parseError == NULL){
return;
}
if(pos == rulesLen && rulesLen >0){
pos--;
}
parseError->offset = pos;
parseError->line = 0 ; // we are not using line numbers
// for pre-context
int32_t start = (pos <=U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
int32_t stop = pos;
u_memcpy(parseError->preContext,rules+start,stop-start);
//null terminate the buffer
parseError->preContext[stop-start] = 0;
//for post-context
start = pos;
if(start<rulesLen) {
U16_FWD_1(rules, start, rulesLen);
}
UParseError* parseError);
/**
* Map every character in input stream with mapping character
* in the mapping table and populate the output stream.
* For any individual character the mapping table may specify
* that that a character be mapped to nothing, mapped to one
* other character or to a string of other characters.
*
* @param prep Profile to use
* @param src Pointer to UChar buffer containing a single label
* @param srcLength Number of characters in the source label
* @param dest Pointer to the destination buffer to receive the output
* @param destCapacity The capacity of destination array
* @param options
* @param parseError
* @param status ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return The number of UChars in the destination buffer
*/
U_CFUNC int32_t
usprep_map( UStringPrepProfile* prep,
const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
int32_t options,
UParseError* parseError,
UErrorCode* status );
/**
* Normalize the input stream using Normalization Form KC (NFKC)
*
* @param prep Profile to use
* @param src Pointer to UChar buffer containing a single label
* @param srcLength Number of characters in the source label
* @param dest Pointer to the destination buffer to receive the output
* @param destCapacity The capacity of destination array
* @param status ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return The number of UChars in the destination buffer
*/
U_CFUNC int32_t
usprep_normalize( UStringPrepProfile* prep,
const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
UErrorCode* status );
U_CFUNC UBool
usprep_isLabelSeparator(UStringPrepProfile* profile,
UChar32 ch, UErrorCode* status);
stop = ((pos+U_PARSE_CONTEXT_LEN)<= rulesLen )? (pos+(U_PARSE_CONTEXT_LEN)) :
rulesLen;
if(start < stop){
u_memcpy(parseError->postContext,rules+start,stop-start);
//null terminate the buffer
parseError->postContext[stop-start]= 0;
}
}
#endif
/* error codes for prototyping
#define U_IDNA_ERROR_START U_ERROR_LIMIT
#define U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 1))
#define U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 2))
#define U_IDNA_CHECK_BIDI_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 3))
#define U_IDNA_STD3_ASCII_RULES_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 4))
#define U_IDNA_ACE_PREFIX_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 5))
#define U_IDNA_VERIFICATION_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 6))
#define U_IDNA_LABEL_TOO_LONG_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 8))
*/
#endif /* #if !UCONFIG_NO_IDNA */

View file

@ -1,513 +0,0 @@
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: strprep.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003feb1
* created by: Ram Viswanadha
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_IDNA
#include "strprep.h"
#include "utrie.h"
#include "umutex.h"
#include "cmemory.h"
#include "sprpimpl.h"
#include "nameprep.h"
#include "ustr_imp.h"
#include "unicode/unorm.h"
#include "unicode/udata.h"
#include "unicode/ustring.h"
static const uint16_t* mappingData = NULL;
static int32_t indexes[_IDNA_INDEX_TOP]={ 0 };
static UBool _isDataLoaded = FALSE;
static UTrie idnTrie={ 0,0,0,0,0,0,0 };
static UDataMemory* idnData=NULL;
static UErrorCode dataErrorCode =U_ZERO_ERROR;
/* file definitions */
static const char DATA_NAME[] = "uidna";
static const char DATA_TYPE[] = "icu";
U_CFUNC UBool
ustrprep_cleanup() {
if(idnData!=NULL) {
udata_close(idnData);
idnData=NULL;
}
dataErrorCode=U_ZERO_ERROR;
_isDataLoaded=FALSE;
return TRUE;
}
U_CDECL_BEGIN
static UBool U_CALLCONV
isAcceptable(void * /* context */,
const char * /* type */,
const char * /* name */,
const UDataInfo *pInfo) {
if(
pInfo->size>=20 &&
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
pInfo->charsetFamily==U_CHARSET_FAMILY &&
pInfo->dataFormat[0]==0x49 && /* dataFormat="IDNA" 0x49, 0x44, 0x4e, 0x41 */
pInfo->dataFormat[1]==0x44 &&
pInfo->dataFormat[2]==0x4e &&
pInfo->dataFormat[3]==0x41 &&
pInfo->formatVersion[0]==2 &&
pInfo->formatVersion[2]==UTRIE_SHIFT &&
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
) {
return TRUE;
} else {
return FALSE;
}
}
static int32_t U_CALLCONV
getFoldingOffset(uint32_t data) {
if(data&0x8000) {
return (int32_t)(data&0x7fff);
} else {
return 0;
}
}
U_CDECL_END
static UBool U_CALLCONV
loadData(UErrorCode &errorCode) {
/* load Unicode IDNA data from file */
UBool isCached;
/* do this because double-checked locking is broken */
umtx_lock(NULL);
isCached=_isDataLoaded;
umtx_unlock(NULL);
if(!isCached) {
UTrie _idnTrie={ 0,0,0,0,0,0,0 };
UDataMemory *data;
const int32_t *p=NULL;
const uint8_t *pb;
if(&errorCode==NULL || U_FAILURE(errorCode)) {
return 0;
}
/* open the data outside the mutex block */
//TODO: change the path
data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errorCode);
dataErrorCode=errorCode;
if(U_FAILURE(errorCode)) {
return _isDataLoaded=FALSE;
}
p=(const int32_t *)udata_getMemory(data);
pb=(const uint8_t *)(p+_IDNA_INDEX_TOP);
utrie_unserialize(&_idnTrie, pb, p[_IDNA_INDEX_TRIE_SIZE], &errorCode);
_idnTrie.getFoldingOffset=getFoldingOffset;
if(U_FAILURE(errorCode)) {
dataErrorCode=errorCode;
udata_close(data);
return _isDataLoaded=FALSE;
}
/* in the mutex block, set the data for this process */
umtx_lock(NULL);
if(idnData==NULL) {
idnData=data;
data=NULL;
uprv_memcpy(&indexes, p, sizeof(indexes));
uprv_memcpy(&idnTrie, &_idnTrie, sizeof(UTrie));
} else {
p=(const int32_t *)udata_getMemory(idnData);
}
umtx_unlock(NULL);
/* initialize some variables */
mappingData=(uint16_t *)((uint8_t *)(p+_IDNA_INDEX_TOP)+indexes[_IDNA_INDEX_TRIE_SIZE]);
_isDataLoaded = TRUE;
/* if a different thread set it first, then close the extra data */
if(data!=NULL) {
udata_close(data); /* NULL if it was set correctly */
}
}
return _isDataLoaded;
}
// *****************************************************************************
// class StringPrep
// *****************************************************************************
U_NAMESPACE_BEGIN
const char StringPrep::fgClassID=0;
UBool StringPrep::isDataLoaded(UErrorCode& status){
if(U_FAILURE(status)){
return FALSE;
}
if(_isDataLoaded==FALSE && U_FAILURE(dataErrorCode)){
status = dataErrorCode;
return FALSE;
}
loadData(dataErrorCode);
if(U_FAILURE(dataErrorCode)){
status = dataErrorCode;
return FALSE;
}
return TRUE;
}
StringPrep* StringPrep::createDefaultInstance(UErrorCode& status){
StringPrep* strprep = new StringPrep();
if(!isDataLoaded(status)){
delete strprep;
return NULL;
}
return strprep;
}
StringPrep* StringPrep::createNameprepInstance(UErrorCode& status){
StringPrep* strprep = new NamePrep(status);
if(!isDataLoaded(status)){
delete strprep;
return NULL;
}
return strprep;
}
UBool StringPrep::isNotProhibited(UChar32 /*ch*/){
return FALSE;
}
UBool StringPrep::isUnassigned(UChar32 ch){
uint32_t result;
UTRIE_GET16(&idnTrie,ch,result);
return (result == UIDNA_UNASSIGNED);
}
static inline void getValues(uint32_t result, int8_t& flag,
int8_t& length, int32_t& index){
/* first 3 bits contain the flag */
flag = (int8_t) (result & 0x07);
/* next 2 bits contain the length */
length = (int8_t) ((result>>3) & 0x03);
/* next 10 bits contain the index */
index = (result>> 5);
}
int32_t StringPrep::map(const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
UBool allowUnassigned,
UParseError* parseError,
UErrorCode& status ){
uint32_t result;
int8_t flag;
int8_t length;
int32_t index;
int32_t destIndex=0;
int32_t srcIndex=0;
// check error status
if(U_FAILURE(status)){
return 0;
}
//check arguments
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
status=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if(srcLength == -1){
srcLength = u_strlen(src);
}
for(;srcIndex<srcLength;){
UChar32 ch;
U16_NEXT(src,srcIndex,srcLength,ch);
UTRIE_GET16(&idnTrie,ch,result);
getValues(result,flag,length,index);
// check if the source codepoint is unassigned
if(flag == UIDNA_UNASSIGNED){
if(allowUnassigned == TRUE){
//copy the ch to destination
if(ch <= 0xFFFF){
if(destIndex < destCapacity ){
dest[destIndex] = (UChar)ch;
}
destIndex++;
}else{
if(destIndex+1 < destCapacity ){
dest[destIndex] = U16_LEAD(ch);
dest[destIndex+1] = U16_TRAIL(ch);
}
destIndex +=2;
}
}else{
uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
status = U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR;
return 0;
}
}else if((flag == UIDNA_MAP_NFKC && doNFKC == TRUE) ||
(index == _IDNA_MAP_TO_NOTHING && doNFKC == FALSE)){
if(length == _IDNA_LENGTH_IN_MAPPING_TABLE){
length = (int8_t) mappingData[index++];
}
for(int8_t i =0; i< length; i++){
if(destIndex < destCapacity ){
dest[destIndex] = mappingData[index+i];
}
destIndex++; /* for pre-flighting */
}
}else{
//copy the source into destination
if(ch <= 0xFFFF){
if(destIndex < destCapacity ){
dest[destIndex] = (UChar)ch;
}
destIndex++;
}else{
if(destIndex+1 < destCapacity ){
dest[destIndex] = U16_LEAD(ch);
dest[destIndex+1] = U16_TRAIL(ch);
}
destIndex +=2;
}
}
}
return u_terminateUChars(dest, destCapacity, destIndex, &status);
}
int32_t StringPrep::normalize( const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
UErrorCode& status ){
return unorm_normalize(src,srcLength,UNORM_NFKC,UNORM_UNICODE_3_2,dest,destCapacity,&status);
}
/*
1) Map -- For each character in the input, check if it has a mapping
and, if so, replace it with its mapping.
2) Normalize -- Possibly normalize the result of step 1 using Unicode
normalization.
3) Prohibit -- Check for any characters that are not allowed in the
output. If any are found, return an error.
4) Check bidi -- Possibly check for right-to-left characters, and if
any are found, make sure that the whole string satisfies the
requirements for bidirectional strings. If the string does not
satisfy the requirements for bidirectional strings, return an
error.
[Unicode3.2] defines several bidirectional categories; each character
has one bidirectional category assigned to it. For the purposes of
the requirements below, an "RandALCat character" is a character that
has Unicode bidirectional categories "R" or "AL"; an "LCat character"
is a character that has Unicode bidirectional category "L". Note
that there are many characters which fall in neither of the above
definitions; Latin digits (<U+0030> through <U+0039>) are examples of
this because they have bidirectional category "EN".
In any profile that specifies bidirectional character handling, all
three of the following requirements MUST be met:
1) The characters in section 5.8 MUST be prohibited.
2) If a string contains any RandALCat character, the string MUST NOT
contain any LCat character.
3) If a string contains any RandALCat character, a RandALCat
character MUST be the first character of the string, and a
RandALCat character MUST be the last character of the string.
*/
#define MAX_STACK_BUFFER_SIZE 300
int32_t StringPrep::process(const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
UBool allowUnassigned,
UParseError* parseError,
UErrorCode& status ){
// check error status
if(U_FAILURE(status)){
return 0;
}
//check arguments
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
status=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
UChar *b1 = b1Stack, *b2 = b2Stack;
int32_t b1Len, b2Len=0,
b1Capacity = MAX_STACK_BUFFER_SIZE ,
b2Capacity = MAX_STACK_BUFFER_SIZE;
uint32_t result;
int32_t b2Index = 0;
int8_t flag;
int8_t length;
int32_t index;
UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
UBool leftToRight=FALSE, rightToLeft=FALSE;
int32_t rtlPos =-1, ltrPos =-1;
b1Len = map(src,srcLength, b1, b1Capacity,allowUnassigned, parseError, status);
if(status == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
/* we do not have enough room so grow the buffer*/
b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
if(b1==NULL){
status = U_MEMORY_ALLOCATION_ERROR;
goto CLEANUP;
}
status = U_ZERO_ERROR; // reset error
b1Len = map(src,srcLength, b1, b1Len,allowUnassigned, parseError, status);
}
b2Len = normalize(b1,b1Len, b2,b2Capacity,status);
if(status == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
/* we do not have enough room so grow the buffer*/
b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
if(b2==NULL){
status = U_MEMORY_ALLOCATION_ERROR;
goto CLEANUP;
}
status = U_ZERO_ERROR; // reset error
b2Len = normalize(b2,b2Len, b2,b2Len,status);
}
if(U_FAILURE(status)){
goto CLEANUP;
}
UChar32 ch;
for(; b2Index<b2Len;){
ch = 0;
U16_NEXT(b2, b2Index, b2Len, ch);
UTRIE_GET16(&idnTrie,ch,result);
getValues(result,flag,length,index);
if(flag == UIDNA_PROHIBITED
&& isNotProhibited(ch) == FALSE){
status = U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR;
uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
goto CLEANUP;
}
direction = u_charDirection(ch);
if(firstCharDir == U_CHAR_DIRECTION_COUNT){
firstCharDir = direction;
}
if(direction == U_LEFT_TO_RIGHT){
leftToRight = TRUE;
ltrPos = b2Index-1;
}
if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
rightToLeft = TRUE;
rtlPos = b2Index-1;
}
}
// satisfy 2
if( leftToRight == TRUE && rightToLeft == TRUE){
status = U_IDNA_CHECK_BIDI_ERROR;
uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
goto CLEANUP;
}
//satisfy 3
if( rightToLeft == TRUE &&
!((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
){
status = U_IDNA_CHECK_BIDI_ERROR;
uprv_syntaxError(b2, rtlPos, b2Len, parseError);
return FALSE;
}
if(b2Len <= destCapacity){
uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
}
CLEANUP:
if(b1!=b1Stack){
uprv_free(b1);
}
if(b2!=b2Stack){
uprv_free(b2);
}
return u_terminateUChars(dest, destCapacity, b2Len, &status);
}
UBool StringPrep::isLabelSeparator(UChar32 ch, UErrorCode& status){
// check error status
if(U_FAILURE(status)){
return FALSE;
}
if(isDataLoaded(status)){
int32_t result;
UTRIE_GET16(&idnTrie,ch, result);
if( (result & 0x07) == UIDNA_LABEL_SEPARATOR){
return TRUE;
}
}
return FALSE;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_IDNA */

View file

@ -1,365 +0,0 @@
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: strprep.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003feb1
* created by: Ram Viswanadha
*/
#ifndef STRPREP_H
#define STRPREP_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_IDNA
#include "unicode/uobject.h"
#include "unicode/uniset.h"
#include "unicode/parseerr.h"
U_NAMESPACE_BEGIN
/**\file
*
* This API implements RF 3454 StringPrep standard.
*
* The steps for preparing strings are:
*
* 1) Map -- For each character in the input, check if it has a mapping
* and, if so, replace it with its mapping.
* <ul>
* <li>Delete certain codepoints from the input because their
* presence or absence in the protocol identifies should not
* make two strings different</li>
* <li>Case Mapings
* <br>If Normalization is turned off
* <br> Get mappings from case map tables
* <br>else
* <br> Get mappings from case map tables for normalization
* <br> Use u_getFC_NFKC_Closure for obtaining extra mappings
* </li>
* </ul>
* 2) Normalize -- Possibly normalize the result of step 1 using Unicode
* normalization NFKC.
*
* 3) Prohibit -- Check for any characters that are not allowed in the
* output. If any are found, return an error.
*
* 4) Check bidi -- Possibly check for right-to-left characters, and if
* any are found, make sure that the whole string satisfies the
* requirements for bidirectional strings. If the string does not
* satisfy the requirements for bidirectional strings, return an
* error.
*
* Some StringPrep profiles:
* IDN: "Nameprep" http://www.ietf.org/rfc/rfc3491.txt
* XMPP Node Identifiers: "Nodeprep" http://www.ietf.org/internet-drafts/draft-ietf-xmpp-nodeprep-01.txt
* XMPP Resource Identifiers: "Resourceprep" http://www.ietf.org/internet-drafts/draft-ietf-xmpp-resourceprep-01.txt
* ANONYMOUS SASL tokens: "plain" http://www.ietf.org/internet-drafts/draft-ietf-sasl-anon-00.txt
* iSCSI http://www.ietf.org/internet-drafts/draft-ietf-ips-iscsi-string-prep-03.txt
*/
class StringPrep : public UObject{
protected:
UVersionInfo unicodeVersion; /** The Character repertoire version of this profile */
UBool bidiCheck; /** Option to turn BiDi checking on */
UBool doNFKC; /** Option to turn NFKC on */
/**
* Protected default constructor sub classes
*/
StringPrep(){};
public:
/**
* Destructor
*/
virtual inline ~StringPrep(){};
/**
* Map every character in input stream with mapping character
* in the mapping table and populate the output stream.
* For any individual character the mapping table may specify
* that that a character be mapped to nothing, mapped to one
* other character or to a string of other characters.
*
* @param src Pointer to UChar buffer containing a single label
* @param srcLength Number of characters in the source label
* @param dest Pointer to the destination buffer to receive the output
* @param destCapacity The capacity of destination array
* @param allowUnassigned Unassigned values can be converted to ASCII for query operations
* If TRUE unassigned values are treated as normal Unicode code point.
* If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
* @param status ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return The number of UChars in the destination buffer
*
*/
virtual int32_t map(const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
UBool allowUnassigned,
UParseError* parseError,
UErrorCode& status );
/**
* Normalize the input stream using Normalization Form KC (NFKC)
*
* @param src Pointer to UChar buffer containing a single label
* @param srcLength Number of characters in the source label
* @param dest Pointer to the destination buffer to receive the output
* @param destCapacity The capacity of destination array
* @param status ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return The number of UChars in the destination buffer
*
*
*/
virtual int32_t normalize( const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
UErrorCode& status );
/**
* Prepare the input stream with for use. This operation maps, normalizes(NFKC),
* checks for prohited and BiDi characters in the order defined by RFC 3454
*
* @param src Pointer to UChar buffer containing a single label
* @param srcLength Number of characters in the source label
* @param dest Pointer to the destination buffer to receive the output
* @param destCapacity The capacity of destination array
* @param allowUnassigned Unassigned values can be converted to ASCII for query operations
* If TRUE unassigned values are treated as normal Unicode code point.
* If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
* @param status ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return The number of UChars in the destination buffer
*
*
*/
virtual int32_t process(const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
UBool allowUnassigned,
UParseError* parseError,
UErrorCode& status );
/**
* Create a profile from prebuilt default Nameprep profile conforming to
* nameprep internet draft (http://www.ietf.org/html.charters/idn-charter.html).
* This is a built-in/unmodifiable profile.
*
* @param status ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return Pointer to StringPrep object that is created. Should be deleted by
* by caller
*
*
*/
static StringPrep* createNameprepInstance(UErrorCode& status);
/**
* Create a profile from prebuilt default StringPrep profile conforming to
* RFC 3454 (ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt).
* User defined profiles can be created by getting the default profile and
* adding mappings, removing mappings, turning options ON/OFF and prohibiting
* characters from the output.
*
* @param status ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
* @return Pointer to StringPrep object that is created. Should be deleted by
* the caller.
*
*
*/
static StringPrep* createDefaultInstance(UErrorCode& status);
/**
* Ascertain if the given code point is a Letter/Digit/Hyphen in the ASCII range
*
* @return TRUE is the code point is a Letter/Digit/Hyphen
*
*
*/
static inline UBool isLDHChar(UChar32 ch);
/**
* Ascertain if the given code point is a label separator as specified by IDNA
*
* @return TRUE is the code point is a label separator
*
*
*/
virtual UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
/**
* Get the BiDi option of this profile
*
*
*/
inline UBool getCheckBiDi();
/**
* Get the normalization (NFKC) option of this profile
*
* @return The normalization option
*
*
*/
inline UBool getNormalization();
/**
* Get the Unicode version which this profile
* conforms to
*
*
*/
inline void getUnicodeVersion(UVersionInfo& info);
private:
// Boiler plate
/**
* Copy constructor.
*
*/
StringPrep(const StringPrep&){};
/**
* Assignment operator.
*
*/
StringPrep& operator=(const StringPrep& other) {return *this;};
/**
* Return true if another object is semantically equal to this one.
*
* @param other the object to be compared with.
* @return true if another object is semantically equal to this one.
*
*/
UBool operator==(const StringPrep& other) const {return FALSE;};
/**
* Return true if another object is semantically unequal to this one.
*
* @param other the object to be compared with.
* @return true if another object is semantically unequal to this one.
*
*/
UBool operator!=(const StringPrep& other) const { return !operator==(other); }
public:
/**
* ICU "poor man's RTTI", returns a UClassID for this class.
*
*
*/
static inline UClassID getStaticClassID();
/**
* ICU "poor man's RTTI", returns a UClassID for the actual class.
*
*
*/
virtual inline UClassID getDynamicClassID() const;
protected:
/**
* Sub classes that slightly modify the default profile
* implement this method to remove characters to
* the prohibited list. The default implementation does not
* check if the data is loaded or not. The caller is responsible
* for checking for data.
*
*/
virtual UBool isNotProhibited(UChar32 ch);
/**
* Sub classes that slightly modify the default profile
* implement this method to remove characters to
* the unassigned list. The default implementation does not
* check if the data is loaded or not. The caller is responsible
* for checking for data.
*/
virtual UBool isUnassigned(UChar32 ch);
/**
* Ascertains if uidna.icu data file is loaded.
* If data is not loaded, loads the data file.
*
*
*/
static UBool isDataLoaded(UErrorCode& status);
private:
/**
* The address of this static class variable serves as this class's ID
* for ICU "poor man's RTTI".
*/
static const char fgClassID;
};
inline UBool StringPrep::getCheckBiDi(){
return bidiCheck;
}
inline UBool StringPrep::getNormalization(){
return doNFKC;
}
inline void StringPrep::getUnicodeVersion(UVersionInfo& info){
for(int32_t i=0; i< (int32_t)(sizeof(info)/sizeof(info[0])); i++){
info[i] = unicodeVersion[i];
}
}
inline UClassID StringPrep::getStaticClassID() {
return (UClassID)&fgClassID;
}
inline UClassID StringPrep::getDynamicClassID() const {
return getStaticClassID();
}
inline UBool StringPrep::isLDHChar(UChar32 ch){
// high runner case
if(ch>0x007A){
return FALSE;
}
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
if( (ch==0x002D) ||
(0x0030 <= ch && ch <= 0x0039) ||
(0x0041 <= ch && ch <= 0x005A) ||
(0x0061 <= ch && ch <= 0x007A)
){
return TRUE;
}
return FALSE;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_IDNA */
#endif
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/

View file

@ -58,7 +58,7 @@ u_cleanup(void)
}
#if !UCONFIG_NO_IDNA
ustrprep_cleanup();
usprep_cleanup();
#endif
#if !UCONFIG_NO_BREAK_ITERATION
breakiterator_cleanup();

View file

@ -35,7 +35,7 @@ U_CFUNC UBool uloc_cleanup(void);
U_CFUNC UBool breakiterator_cleanup(void);
U_CFUNC UBool ustrprep_cleanup(void);
U_CFUNC UBool usprep_cleanup(void);
U_CFUNC UBool U_EXPORT2 ucnv_cleanup(void);

View file

@ -20,7 +20,7 @@
#include "unicode/uidna.h"
#include "unicode/ustring.h"
#include "strprep.h"
#include "unicode/usprep.h"
#include "punycode.h"
#include "ustr_imp.h"
#include "cmemory.h"
@ -40,6 +40,7 @@ static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
#define CAPITAL_Z 0x005A
#define LOWER_CASE_DELTA 0x0020
#define FULL_STOP 0x002E
#define DATA_FILE_NAME "uidna"
inline static UChar
toASCIILower(UChar ch){
@ -119,21 +120,70 @@ compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
return lengthResult;
}
static inline UBool
isLDHChar(UChar32 ch){
// high runner case
if(ch>0x007A){
return FALSE;
}
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
if( (ch==0x002D) ||
(0x0030 <= ch && ch <= 0x0039) ||
(0x0041 <= ch && ch <= 0x005A) ||
(0x0061 <= ch && ch <= 0x007A)
){
return TRUE;
}
return FALSE;
}
U_CAPI int32_t U_EXPORT2
uidna_toASCII(const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
int32_t options,
UParseError* parseError,
UErrorCode* status){
if(status == NULL || U_FAILURE(*status)){
return 0;
}
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
// returns the length of the label excluding the separator
// if *limit == separator then the length returned does not include
// the separtor.
static inline int32_t
getNextSeparator(UChar *src,int32_t srcLength,UStringPrepProfile* nameprep,
UChar **limit,
UBool *done,
UErrorCode *status){
if(srcLength == -1){
int32_t i;
for(i=0 ; ;i++){
if(src[i] == 0){
*limit = src + i; // point to null
*done = TRUE;
return i;
}
if(usprep_isLabelSeparator(nameprep, src[i], status)){
*limit = src + (i+1); // go past the delimiter
return i;
}
}
}else{
int32_t i;
for(i=0;i<srcLength;i++){
if(usprep_isLabelSeparator(nameprep, src[i], status)){
*limit = src + (i+1); // go past the delimiter
return i;
}
}
// we have not found the delimiter
// if(i==srcLength)
*limit = src+srcLength;
*done = TRUE;
return i;
}
}
static int32_t
_internal_toASCII(const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
int32_t options,
UStringPrepProfile* nameprep,
UParseError* parseError,
UErrorCode* status){
UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
//initialize pointers to stack buffers
UChar *b1 = b1Stack, *b2 = b2Stack;
@ -142,7 +192,7 @@ uidna_toASCII(const UChar* src, int32_t srcLength,
b2Capacity = MAX_LABEL_BUFFER_SIZE ,
reqLength=0;
int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
UBool* caseFlags = NULL;
// the source contains all ascii codepoints
@ -153,18 +203,12 @@ uidna_toASCII(const UChar* src, int32_t srcLength,
int32_t j=0;
//get the options
UBool allowUnassigned = (UBool)((options & UIDNA_ALLOW_UNASSIGNED) != 0);
UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
int32_t failPos = -1;
// step 2
StringPrep* prep = StringPrep::createNameprepInstance(*status);
if(U_FAILURE(*status)){
goto CLEANUP;
}
b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned, parseError, *status);
int32_t failPos = -1;
// step 2
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
if(*status == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
@ -177,7 +221,7 @@ uidna_toASCII(const UChar* src, int32_t srcLength,
*status = U_ZERO_ERROR; // reset error
b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
}
// error bail out
if(U_FAILURE(*status)){
@ -192,7 +236,7 @@ uidna_toASCII(const UChar* src, int32_t srcLength,
// here we do not assemble surrogates
// since we know that LDH code points
// are in the ASCII range only
if(prep->isLDHChar(b1[j])==FALSE){
if(isLDHChar(b1[j])==FALSE){
srcIsLDH = FALSE;
failPos = j;
}
@ -292,30 +336,20 @@ CLEANUP:
}
uprv_free(caseFlags);
delete prep;
return u_terminateUChars(dest, destCapacity, reqLength, status);
}
U_CAPI int32_t U_EXPORT2
uidna_toUnicode(const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
int32_t options,
UParseError* parseError,
UErrorCode* status){
if(status == NULL || U_FAILURE(*status)){
return 0;
}
if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
static int32_t
_internal_toUnicode(const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
int32_t options,
UStringPrepProfile* nameprep,
UParseError* parseError,
UErrorCode* status){
//get the options
UBool allowUnassigned = (UBool)((options & UIDNA_ALLOW_UNASSIGNED) != 0);
UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
@ -326,8 +360,7 @@ uidna_toUnicode(const UChar* src, int32_t srcLength,
b2Capacity = MAX_LABEL_BUFFER_SIZE,
b3Capacity = MAX_LABEL_BUFFER_SIZE,
reqLength=0;
StringPrep* prep = StringPrep::createNameprepInstance(*status);
b1Len = 0;
UBool* caseFlags = NULL;
@ -335,10 +368,6 @@ uidna_toUnicode(const UChar* src, int32_t srcLength,
UBool srcIsLDH = TRUE;
int32_t failPos =0;
if(U_FAILURE(*status)){
goto CLEANUP;
}
// step 1: find out if all the codepoints in src are ASCII
if(srcLength==-1){
srcLength = 0;
@ -349,7 +378,7 @@ uidna_toUnicode(const UChar* src, int32_t srcLength,
// here we do not assemble surrogates
// since we know that LDH code points
// are in the ASCII range only
if(prep->isLDHChar(src[srcLength])==FALSE){
if(isLDHChar(src[srcLength])==FALSE){
srcIsLDH = FALSE;
failPos = srcLength;
}
@ -363,7 +392,7 @@ uidna_toUnicode(const UChar* src, int32_t srcLength,
// here we do not assemble surrogates
// since we know that LDH code points
// are in the ASCII range only
if(prep->isLDHChar(src[j])==FALSE){
if(isLDHChar(src[j])==FALSE){
srcIsLDH = FALSE;
failPos = j;
}
@ -372,7 +401,7 @@ uidna_toUnicode(const UChar* src, int32_t srcLength,
if(srcIsASCII == FALSE){
// step 2: process the string
b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parseError, *status);
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
if(*status == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
/* we do not have enough room so grow the buffer*/
@ -384,7 +413,7 @@ uidna_toUnicode(const UChar* src, int32_t srcLength,
*status = U_ZERO_ERROR; // reset error
b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
}
//bail out on error
if(U_FAILURE(*status)){
@ -495,8 +524,7 @@ CLEANUP:
}
uprv_free(caseFlags);
delete prep;
// The RFC states that
// <quote>
// ToUnicode never fails. If any step fails, then the original input
@ -518,45 +546,64 @@ CLEANUP:
return u_terminateUChars(dest, destCapacity, reqLength, status);
}
// returns the length of the label excluding the separator
// if *limit == separator then the length returned does not include
// the separtor.
static int32_t
getNextSeparator(UChar *src,int32_t srcLength,StringPrep* prep,
UChar **limit,
UBool *done,
UErrorCode *status){
if(srcLength == -1){
int32_t i;
for(i=0 ; ;i++){
if(src[i] == 0){
*limit = src + i; // point to null
*done = TRUE;
return i;
}
if(prep->isLabelSeparator(src[i],*status)){
*limit = src + (i+1); // go past the delimiter
return i;
}
}
}else{
int32_t i;
for(i=0;i<srcLength;i++){
if(prep->isLabelSeparator(src[i],*status)){
*limit = src + (i+1); // go past the delimiter
return i;
}
}
// we have not found the delimiter
// if(i==srcLength)
*limit = src+srcLength;
*done = TRUE;
return i;
U_CAPI int32_t U_EXPORT2
uidna_toASCII(const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
int32_t options,
UParseError* parseError,
UErrorCode* status){
if(status == NULL || U_FAILURE(*status)){
return 0;
}
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
UStringPrepProfile* nameprep = usprep_open(NULL,DATA_FILE_NAME, status);
if(U_FAILURE(*status)){
return -1;
}
int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
/* close the profile*/
usprep_close(nameprep);
return retLen;
}
U_CAPI int32_t U_EXPORT2
uidna_toUnicode(const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
int32_t options,
UParseError* parseError,
UErrorCode* status){
if(status == NULL || U_FAILURE(*status)){
return 0;
}
if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
UStringPrepProfile* nameprep = usprep_open(NULL, DATA_FILE_NAME, status);
if(U_FAILURE(*status)){
return -1;
}
int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
usprep_close(nameprep);
return retLen;
}
U_CAPI int32_t U_EXPORT2
uidna_IDNToASCII( const UChar *src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
@ -574,7 +621,7 @@ uidna_IDNToASCII( const UChar *src, int32_t srcLength,
int32_t reqLength = 0;
StringPrep* prep = StringPrep::createNameprepInstance(*status);
UStringPrepProfile* nameprep = usprep_open(NULL, DATA_FILE_NAME, status);
if(U_FAILURE(*status)){
return 0;
@ -592,11 +639,12 @@ uidna_IDNToASCII( const UChar *src, int32_t srcLength,
for(;;){
labelLen = getNextSeparator(labelStart,remainingLen, prep, &delimiter,&done, status);
labelLen = getNextSeparator(labelStart,remainingLen, nameprep, &delimiter,&done, status);
labelReqLength = uidna_toASCII( labelStart, labelLen,
currentDest, remainingDestCapacity,
options, parseError, status);
labelReqLength = _internal_toASCII( labelStart, labelLen,
currentDest, remainingDestCapacity,
options, nameprep,
parseError, status);
if(*status == U_BUFFER_OVERFLOW_ERROR){
@ -636,7 +684,7 @@ uidna_IDNToASCII( const UChar *src, int32_t srcLength,
}
delete prep;
usprep_close(nameprep);
return u_terminateUChars(dest, destCapacity, reqLength, status);
}
@ -658,7 +706,7 @@ uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
int32_t reqLength = 0;
StringPrep* prep = StringPrep::createNameprepInstance(*status);
UStringPrepProfile* nameprep = usprep_open(NULL, DATA_FILE_NAME, status);
if(U_FAILURE(*status)){
return 0;
@ -676,11 +724,12 @@ uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
for(;;){
labelLen = getNextSeparator(labelStart,remainingLen, prep, &delimiter,&done, status);
labelLen = getNextSeparator(labelStart,remainingLen, nameprep, &delimiter,&done, status);
labelReqLength = uidna_toUnicode(labelStart, labelLen,
currentDest, remainingDestCapacity,
options, parseError, status);
labelReqLength = _internal_toUnicode(labelStart, labelLen,
currentDest, remainingDestCapacity,
options, nameprep,
parseError, status);
if(*status == U_BUFFER_OVERFLOW_ERROR){
@ -721,7 +770,7 @@ uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
}
delete prep;
usprep_close(nameprep);
return u_terminateUChars(dest, destCapacity, reqLength, status);
}

View file

@ -44,7 +44,7 @@
* once.
* ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
* ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
*\end_file
*
*/
/**
@ -84,18 +84,18 @@
* @param destCapacity Size of dest.
* @param options A bit set of options:
*
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* U_UNASSIGNED_CODE_POINT_FOUND error code.
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* U_UNASSIGNED_ERROR error code.
*
* - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
* - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
*
* @param parseError Pointer to UParseError struct to receive information on position
* of error if an error is encountered. Can be NULL.
@ -129,23 +129,23 @@ uidna_toASCII(const UChar* src, int32_t srcLength,
* @param destCapacity Size of dest.
* @param options A bit set of options:
*
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* U_UNASSIGNED_CODE_POINT_FOUND error code.
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* U_UNASSIGNED_ERROR error code.
*
* - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points. <b> Note: </b> This option is
* required on toUnicode operation because the RFC mandates
* verification of decoded ACE input by applying toASCII and comparing
* its output with source
* - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points. <b> Note: </b> This option is
* required on toUnicode operation because the RFC mandates
* verification of decoded ACE input by applying toASCII and comparing
* its output with source
*
*
*
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
*
* @param parseError Pointer to UParseError struct to receive information on position
* of error if an error is encountered. Can be NULL.
@ -184,18 +184,18 @@ uidna_toUnicode(const UChar* src, int32_t srcLength,
* @param destCapacity Size of dest.
* @param options A bit set of options:
*
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* U_UNASSIGNED_CODE_POINT_FOUND error code.
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* U_UNASSIGNED_CODE_POINT_FOUND error code.
*
* - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
* - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
*
* @param parseError Pointer to UParseError struct to receive information on position
* of error if an error is encountered. Can be NULL.
@ -230,18 +230,18 @@ uidna_IDNToASCII( const UChar* src, int32_t srcLength,
* @param destCapacity Size of dest.
* @param options A bit set of options:
*
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* U_UNASSIGNED_CODE_POINT_FOUND error code.
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* U_UNASSIGNED_CODE_POINT_FOUND error code.
*
* - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
* - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
*
* @param parseError Pointer to UParseError struct to receive information on position
* of error if an error is encountered. Can be NULL.
@ -277,18 +277,18 @@ uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
* @param length2 Length of second source string, or -1 if NUL-terminated.
* @param options A bit set of options:
*
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* U_UNASSIGNED_CODE_POINT_FOUND error code.
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* U_UNASSIGNED_CODE_POINT_FOUND error code.
*
* - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
* - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
*
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
* If this option is set and the input does not satisfy STD3 rules,
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
*
* @param status ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.

View file

@ -0,0 +1,121 @@
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: usprep.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003jul2
* created by: Ram Viswanadha
*/
#ifndef __USPREP_H__
#define __USPREP_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_IDNA
#include "unicode/parseerr.h"
typedef struct UStringPrepProfile UStringPrepProfile;
/**
* Option to prohibit processing of unassigned codepoints in the input
*
* @see usprep_prepare
* @draft ICU 2.8
*/
#define USPREP_NONE 0x0000
/**
* Option to allow processing of unassigned codepoints in the input
*
* @see usprep_prepare
* @draft ICU 2.8
*/
#define USPREP_ALLOW_UNASSIGNED 0x0001
/**
* Creates a StringPrep profile from the data file.
*
* @param path string containing the full path pointing to the directory
* where the resources reside followed by the package name
* e.g. "/usr/resource/my_app/resources/guimessages" on a Unix system.
* if NULL, ICU default data files will be used.
* @param fileName name of the profile file to be opened
* @param status ICU error code in/out parameter. Must not be NULL.
* Must fulfill U_SUCCESS before the function call.
* @return Pointer to UStringPrepProfile that is opened. Should be closed by
* calling usprep_close()
* @see usprep_close()
* @draft ICU 2.8
*/
U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char* path,
const char* fileName,
UErrorCode* status);
/**
* Closes the profile
* @param profile The profile to close
* @draft ICU 2.8
*/
U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile* profile);
/**
* Prepare the input stream for use. This operation maps, normalizes(NFKC),
* checks for prohited and BiDi characters in the order defined by RFC 3454
* depending on the options specified
*
* @param prep The profile to use
* @param src Pointer to UChar buffer containing the string to prepare
* @param srcLength Number of characters in the source string
* @param dest Pointer to the destination buffer to receive the output
* @param destCapacity The capacity of destination array
* @paran options A bit set of options:
*
* - USPREP_NONE Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
* If unassigned code points are found the operation fails with
* U_UNASSIGNED_ERROR error code.
*
* - USPREP_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
* If this option is set, the unassigned code points are in the input
* are treated as normal Unicode code points.
* @param parseError Pointer to UParseError struct to receive information on position
* of error if an error is encountered. Can be NULL.
* @param status ICU in/out error code parameter.
* U_INVALID_CHAR_FOUND if src contains
* unmatched single surrogates.
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
* too many code points.
* U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
* @return Number of ASCII characters converted.
* @return The number of UChars in the destination buffer
* @draft ICU 2.8
*/
U_CAPI int32_t U_EXPORT2
usprep_prepare( const UStringPrepProfile* prep,
const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
int32_t options,
UParseError* parseError,
UErrorCode* status );
#endif /* #if !UCONFIG_NO_IDNA */
#endif

View file

@ -650,16 +650,23 @@ typedef enum UErrorCode {
* The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes
*/
U_IDNA_ERROR_START=0x10400,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR,
U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR,
U_IDNA_PROHIBITED_ERROR,
U_IDNA_UNASSIGNED_ERROR,
U_IDNA_CHECK_BIDI_ERROR,
U_IDNA_STD3_ASCII_RULES_ERROR,
U_IDNA_ACE_PREFIX_ERROR,
U_IDNA_VERIFICATION_ERROR,
U_IDNA_LABEL_TOO_LONG_ERROR,
U_IDNA_ERROR_LIMIT,
/*
* Aliases for StringPrep
*/
U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
U_ERROR_LIMIT=U_IDNA_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
U_ERROR_LIMIT=U_IDNA_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
} UErrorCode;
/* Use the following to determine if an UErrorCode represents */

View file

@ -26,6 +26,9 @@
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
#ifdef DEBUG
#include <stdio.h>
#endif
/**
* Unicode property names and property value names are compared
* "loosely". Property[Value]Aliases.txt say:
@ -429,6 +432,20 @@ u_getIntPropertyMaxValue(UProperty which) {
* Do not use a UnicodeSet pattern because that causes infinite recursion;
* UnicodeSet depends on the inclusions set.
*/
#ifdef DEBUG
static uint32_t
strrch(const char* source,uint32_t sourceLen,char find){
const char* tSourceEnd =source + (sourceLen-1);
while(tSourceEnd>= source){
if(*tSourceEnd==find){
return (uint32_t)(tSourceEnd-source);
}
tSourceEnd--;
}
return (uint32_t)(tSourceEnd-source);
}
#endif
U_CAPI void U_EXPORT2
uprv_getInclusions(USet* set, UErrorCode *pErrorCode) {
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
@ -441,4 +458,64 @@ uprv_getInclusions(USet* set, UErrorCode *pErrorCode) {
unorm_addPropertyStarts(set, pErrorCode);
#endif
uchar_addPropertyStarts(set, pErrorCode);
#ifdef DEBUG
{
UChar* result=NULL;
int32_t resultCapacity=0;
int32_t bufLen = uset_toPattern(set,result,resultCapacity,TRUE,pErrorCode);
char* resultChars = NULL;
if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR){
uint32_t len = 0, add=0;
char *buf=NULL, *current = NULL;
*pErrorCode = U_ZERO_ERROR;
resultCapacity = bufLen;
result = (UChar*) uprv_malloc(resultCapacity * U_SIZEOF_UCHAR);
bufLen = uset_toPattern(set,result,resultCapacity,TRUE,pErrorCode);
resultChars = (char*) uprv_malloc(len+1);
u_UCharsToChars(result,resultChars,bufLen);
resultChars[bufLen] = 0;
buf = resultChars;
/*printf(resultChars);*/
while(len < bufLen){
add = 70-5/* for ", +\n */;
current = buf +len;
if (add < (bufLen-len)) {
uint32_t index = strrch(current,add,'\\');
if (index > add) {
index = add;
} else {
int32_t num =index-1;
uint32_t seqLen;
while(num>0){
if(current[num]=='\\'){
num--;
}else{
break;
}
}
if ((index-num)%2==0) {
index--;
}
seqLen = (current[index+1]=='u') ? 6 : 2;
if ((add-index) < seqLen) {
add = index + seqLen;
}
}
}
fwrite("\"",1,1,stdout);
if(len+add<bufLen){
fwrite(current,1,add,stdout);
fwrite("\" +\n",1,4,stdout);
}else{
fwrite(current,1,bufLen-len,stdout);
}
len+=add;
}
}
uprv_free(result);
uprv_free(resultChars);
}
#endif
}

View file

@ -0,0 +1,808 @@
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: usprep.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003jul2
* created by: Ram Viswanadha
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_IDNA
#include "unicode/usprep.h"
#include "unicode/unorm.h"
#include "unicode/ustring.h"
#include "unicode/uchar.h"
#include "unicode/uversion.h"
#include "umutex.h"
#include "cmemory.h"
#include "sprpimpl.h"
#include "ustr_imp.h"
#include "uhash.h"
#include "cstring.h"
U_CDECL_BEGIN
/*
Static cache for already opened StringPrep profiles
*/
static UHashtable *SHARED_DATA_HASHTABLE = NULL;
static UMTX usprepMutex = NULL;
static UBool U_CALLCONV
isAcceptable(void * /* context */,
const char * /* type */,
const char * /* name */,
const UDataInfo *pInfo) {
if(
pInfo->size>=20 &&
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
pInfo->charsetFamily==U_CHARSET_FAMILY &&
pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
pInfo->dataFormat[1]==0x50 &&
pInfo->dataFormat[2]==0x52 &&
pInfo->dataFormat[3]==0x50 &&
pInfo->formatVersion[0]==3 &&
pInfo->formatVersion[2]==UTRIE_SHIFT &&
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
) {
return TRUE;
} else {
return FALSE;
}
}
static int32_t U_CALLCONV
getFoldingOffset(uint32_t data) {
return (int32_t)data;
}
U_CDECL_END
/* hashes an entry */
static int32_t U_EXPORT2 U_CALLCONV
hashEntry(const UHashTok parm) {
UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
UHashTok namekey, pathkey;
namekey.pointer = b->name;
pathkey.pointer = b->path;
return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
}
/* compares two entries */
static UBool U_EXPORT2 U_CALLCONV
compareEntries(const UHashTok p1, const UHashTok p2) {
UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
UHashTok name1, name2, path1, path2;
name1.pointer = b1->name;
name2.pointer = b2->name;
path1.pointer = b1->path;
path2.pointer = b2->path;
return ((UBool)(uhash_compareChars(name1, name2) &
uhash_compareChars(path1, path2)));
}
U_CFUNC void
usprep_init(UErrorCode *status) {
umtx_init(&usprepMutex);
}
/** Initializes the cache for resources */
static void
initCache(UErrorCode *status) {
UBool makeCache = FALSE;
umtx_lock(&usprepMutex);
makeCache = (SHARED_DATA_HASHTABLE == NULL);
umtx_unlock(&usprepMutex);
if(makeCache) {
UHashtable *newCache = uhash_open(hashEntry, compareEntries, status);
if (U_FAILURE(*status)) {
return;
}
umtx_lock(&usprepMutex);
if(SHARED_DATA_HASHTABLE == NULL) {
SHARED_DATA_HASHTABLE = newCache;
newCache = NULL;
}
umtx_unlock(&usprepMutex);
if(newCache != NULL) {
uhash_close(newCache);
}
}
}
static UBool U_CALLCONV
loadData(UStringPrepProfile* profile,
const char* path,
const char* name,
const char* type,
UErrorCode* errorCode) {
/* load Unicode SPREP data from file */
UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
UDataMemory *dataMemory;
const int32_t *p=NULL;
const uint8_t *pb;
UVersionInfo unicodeVersion;
int32_t normVer, uniVer;
if(errorCode==NULL || U_FAILURE(*errorCode)) {
return 0;
}
/* open the data outside the mutex block */
//TODO: change the path
dataMemory=udata_openChoice(path, type, name, isAcceptable, NULL, errorCode);
if(U_FAILURE(*errorCode)) {
return FALSE;
}
p=(const int32_t *)udata_getMemory(dataMemory);
pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
_sprepTrie.getFoldingOffset=getFoldingOffset;
if(U_FAILURE(*errorCode)) {
udata_close(dataMemory);
return FALSE;
}
/* in the mutex block, set the data for this process */
umtx_lock(&usprepMutex);
if(profile->sprepData==NULL) {
profile->sprepData=dataMemory;
dataMemory=NULL;
uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
} else {
p=(const int32_t *)udata_getMemory(profile->sprepData);
}
umtx_unlock(&usprepMutex);
/* initialize some variables */
profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
/*
* check the normalization corrections version and the current Unicode version
* supported by ICU
*/
u_versionFromString(unicodeVersion, U_UNICODE_VERSION);
normVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
uniVer = (unicodeVersion[0] << 24) + (unicodeVersion[1] << 16) +
(unicodeVersion[2] << 8 ) + (unicodeVersion[3]);
if( normVer < uniVer &&
((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
){
*errorCode = U_INVALID_FORMAT_ERROR;
udata_close(dataMemory);
return FALSE;
}
profile->isDataLoaded = TRUE;
/* if a different thread set it first, then close the extra data */
if(dataMemory!=NULL) {
udata_close(dataMemory); /* NULL if it was set correctly */
}
return profile->isDataLoaded;
}
static UStringPrepProfile*
usprep_getProfile(const char* path,
const char* name,
UErrorCode *status){
UStringPrepProfile* profile = NULL;
initCache(status);
if(U_FAILURE(*status)){
return NULL;
}
UStringPrepKey stackKey;
/*
* const is cast way to save malloc, strcpy and free calls
* we use the passed in pointers for fetching the data from the
* hash table which is safe
*/
stackKey.name = (char*) name;
stackKey.path = (char*) path;
/* fetch the data from the cache */
profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
if(profile == NULL){
UStringPrepKey* key = (UStringPrepKey*) uprv_malloc(sizeof(UStringPrepKey));
if(key == NULL){
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
/* else load the data and put the data in the cache */
profile = (UStringPrepProfile*) uprv_malloc(sizeof(UStringPrepProfile));
if(profile == NULL){
*status = U_MEMORY_ALLOCATION_ERROR;
uprv_free(key);
return NULL;
}
/* initialize the data struct members */
uprv_memset(profile->indexes,0,sizeof(profile->indexes));
profile->mappingData = NULL;
profile->sprepData = NULL;
profile->refCount = 0;
/* initialize the key memebers */
key->name = (char*) uprv_malloc(strlen(name)+1);
if(key->name == NULL){
*status = U_MEMORY_ALLOCATION_ERROR;
uprv_free(key);
uprv_free(profile);
return NULL;
}
uprv_strcpy(key->name, name);
key->path=NULL;
if(path != NULL){
key->path = (char*) uprv_malloc(strlen(path)+1);
if(key->path == NULL){
*status = U_MEMORY_ALLOCATION_ERROR;
uprv_free(key->path);
uprv_free(key);
uprv_free(profile);
return NULL;
}
uprv_strcpy(key->path, path);
}
/* load the data */
if(!loadData(profile, path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
return NULL;
}
umtx_lock(&usprepMutex);
/* add the data object to the cache */
uhash_put(SHARED_DATA_HASHTABLE, key, profile, status);
umtx_unlock(&usprepMutex);
}
umtx_lock(&usprepMutex);
/* increment the refcount */
profile->refCount++;
umtx_unlock(&usprepMutex);
return profile;
}
U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char* path,
const char* name,
UErrorCode* status){
if(status == NULL || U_FAILURE(*status)){
return NULL;
}
/* initialize the profile struct members */
return usprep_getProfile(path,name,status);;
}
U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile* profile){
if(profile==NULL){
return;
}
umtx_lock(&usprepMutex);
/* decrement the ref count*/
if(profile->refCount > 0){
profile->refCount--;
}
umtx_unlock(&usprepMutex);
}
static void
usprep_unload(UStringPrepProfile* data){
udata_close(data->sprepData);
}
static int32_t
usprep_internal_flushCache(UBool noRefCount){
UStringPrepProfile *profile = NULL;
UStringPrepKey *key = NULL;
int32_t pos = -1;
int32_t deletedNum = 0;
const UHashElement *e;
/*
* if shared data hasn't even been lazy evaluated yet
* return 0
*/
umtx_lock(&usprepMutex);
if (SHARED_DATA_HASHTABLE == NULL) {
umtx_unlock(&usprepMutex);
return 0;
}
/*creates an enumeration to iterate through every element in the table */
while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
{
profile = (UStringPrepProfile *) e->value.pointer;
key = (UStringPrepKey *) e->key.pointer;
if ((noRefCount== FALSE && profile->refCount == 0) ||
noRefCount== TRUE) {
deletedNum++;
uhash_removeElement(SHARED_DATA_HASHTABLE, e);
/* unload the data */
usprep_unload(profile);
if(key->name != NULL) {
uprv_free(key->name);
key->name=NULL;
}
if(key->path != NULL) {
uprv_free(key->path);
key->path=NULL;
}
uprv_free(profile);
uprv_free(key);
}
}
umtx_unlock(&usprepMutex);
return deletedNum;
}
/* Works just like ucnv_flushCache() */
static int32_t
usprep_flushCache(){
return usprep_internal_flushCache(FALSE);
}
U_CFUNC UBool
usprep_cleanup(void){
if (SHARED_DATA_HASHTABLE != NULL) {
usprep_internal_flushCache(TRUE);
if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
uhash_close(SHARED_DATA_HASHTABLE);
SHARED_DATA_HASHTABLE = NULL;
}
}
umtx_destroy(&usprepMutex); /* Don't worry about destroying the mutex even */
/* if the hash table still exists. The mutex */
/* will lazily re-init itself if needed. */
return (SHARED_DATA_HASHTABLE == NULL);
}
U_CFUNC void
uprv_syntaxError(const UChar* rules,
int32_t pos,
int32_t rulesLen,
UParseError* parseError){
if(parseError == NULL){
return;
}
if(pos == rulesLen && rulesLen >0){
pos--;
}
parseError->offset = pos;
parseError->line = 0 ; // we are not using line numbers
// for pre-context
int32_t start = (pos <=U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
int32_t stop = pos;
u_memcpy(parseError->preContext,rules+start,stop-start);
//null terminate the buffer
parseError->preContext[stop-start] = 0;
//for post-context
start = pos;
if(start<rulesLen) {
U16_FWD_1(rules, start, rulesLen);
}
stop = ((pos+U_PARSE_CONTEXT_LEN)<= rulesLen )? (pos+(U_PARSE_CONTEXT_LEN)) :
rulesLen;
if(start < stop){
u_memcpy(parseError->postContext,rules+start,stop-start);
//null terminate the buffer
parseError->postContext[stop-start]= 0;
}
}
static inline UStringPrepType
getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
UStringPrepType type;
if(trieWord == 0){
/*
* Initial value stored in the mapping table
* just return USPREP_TYPE_LIMIT .. so that
* the source codepoint is copied to the destination
*/
type = USPREP_TYPE_LIMIT;
}else if(trieWord >= _SPREP_TYPE_THRESHOLD){
type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
}else{
/* get the type */
type = USPREP_MAP;
/* ascertain if the value is index or delta */
if(trieWord & 0x02){
isIndex = TRUE;
value = trieWord >> 2; //mask off the lower 2 bits and shift
}else{
isIndex = FALSE;
value = (int16_t)trieWord;
value = (value >> 2);
}
if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
type = USPREP_DELETE;
isIndex =FALSE;
value = 0;
}
}
return type;
}
static int32_t
usprep_map( const UStringPrepProfile* profile,
const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
int32_t options,
UParseError* parseError,
UErrorCode* status ){
uint16_t result;
int32_t destIndex=0;
int32_t srcIndex;
UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
UStringPrepType type;
int16_t value;
UBool isIndex;
int32_t* indexes = (int32_t*)profile->indexes;
// no error checking the caller check for error and arguments
// no string length check the caller finds out the string length
for(srcIndex=0;srcIndex<srcLength;){
UChar32 ch;
U16_NEXT(src,srcIndex,srcLength,ch);
result=0;
UTRIE_GET16(&profile->sprepTrie,ch,result);
type = getValues(result, value, isIndex);
// check if the source codepoint is unassigned
if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
*status = U_STRINGPREP_UNASSIGNED_ERROR;
return 0;
}else if(type == USPREP_MAP){
int32_t index, length;
if(isIndex){
index = value;
if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
length = 1;
}else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
length = 2;
}else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
length = 3;
}else{
length = profile->mappingData[index++];
}
/* copy mapping to destination */
for(int32_t i=0; i< length; i++){
if(destIndex < destCapacity ){
dest[destIndex] = profile->mappingData[index+i];
}
destIndex++; /* for pre-flighting */
}
continue;
}else{
// subtract the delta to arrive at the code point
ch -= value;
}
}else if(type==USPREP_DELETE){
// just consume the codepoint and contine
continue;
}
//copy the code point into destination
if(ch <= 0xFFFF){
if(destIndex < destCapacity ){
dest[destIndex] = (UChar)ch;
}
destIndex++;
}else{
if(destIndex+1 < destCapacity ){
dest[destIndex] = U16_LEAD(ch);
dest[destIndex+1] = U16_TRAIL(ch);
}
destIndex +=2;
}
}
return u_terminateUChars(dest, destCapacity, destIndex, status);
}
static int32_t
usprep_normalize( const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
UErrorCode* status ){
return unorm_normalize(src,srcLength,UNORM_NFKC,UNORM_UNICODE_3_2,dest,destCapacity,status);
}
/*
1) Map -- For each character in the input, check if it has a mapping
and, if so, replace it with its mapping.
2) Normalize -- Possibly normalize the result of step 1 using Unicode
normalization.
3) Prohibit -- Check for any characters that are not allowed in the
output. If any are found, return an error.
4) Check bidi -- Possibly check for right-to-left characters, and if
any are found, make sure that the whole string satisfies the
requirements for bidirectional strings. If the string does not
satisfy the requirements for bidirectional strings, return an
error.
[Unicode3.2] defines several bidirectional categories; each character
has one bidirectional category assigned to it. For the purposes of
the requirements below, an "RandALCat character" is a character that
has Unicode bidirectional categories "R" or "AL"; an "LCat character"
is a character that has Unicode bidirectional category "L". Note
that there are many characters which fall in neither of the above
definitions; Latin digits (<U+0030> through <U+0039>) are examples of
this because they have bidirectional category "EN".
In any profile that specifies bidirectional character handling, all
three of the following requirements MUST be met:
1) The characters in section 5.8 MUST be prohibited.
2) If a string contains any RandALCat character, the string MUST NOT
contain any LCat character.
3) If a string contains any RandALCat character, a RandALCat
character MUST be the first character of the string, and a
RandALCat character MUST be the last character of the string.
*/
#define MAX_STACK_BUFFER_SIZE 300
U_CAPI int32_t U_EXPORT2
usprep_prepare( const UStringPrepProfile* profile,
const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
int32_t options,
UParseError* parseError,
UErrorCode* status ){
// check error status
if(status == NULL || U_FAILURE(*status)){
return 0;
}
//check arguments
if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
*status=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
UChar *b1 = b1Stack, *b2 = b2Stack;
int32_t b1Len, b2Len=0,
b1Capacity = MAX_STACK_BUFFER_SIZE ,
b2Capacity = MAX_STACK_BUFFER_SIZE;
uint16_t result;
int32_t b2Index = 0;
UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
UBool leftToRight=FALSE, rightToLeft=FALSE;
int32_t rtlPos =-1, ltrPos =-1;
const int32_t *indexes = profile->indexes;
// get the options
UBool doNFKC = (UBool)((indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
UBool checkBiDi = (UBool)((indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
//get the string length
if(srcLength == -1){
srcLength = u_strlen(src);
}
// map
b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
if(*status == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
/* we do not have enough room so grow the buffer*/
b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
if(b1==NULL){
*status = U_MEMORY_ALLOCATION_ERROR;
goto CLEANUP;
}
*status = U_ZERO_ERROR; // reset error
b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
}
// normalize
if(doNFKC == TRUE){
b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
}else{
b2 = b1;
b2Len = b1Len;
}
if(*status == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
/* we do not have enough room so grow the buffer*/
b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
if(b2==NULL){
*status = U_MEMORY_ALLOCATION_ERROR;
goto CLEANUP;
}
*status = U_ZERO_ERROR; // reset error
b2Len = usprep_normalize(b2,b2Len, b2,b2Len,status);
}
if(U_FAILURE(*status)){
goto CLEANUP;
}
UChar32 ch;
UStringPrepType type;
int16_t value;
UBool isIndex;
// Prohibit and checkBiDi in one pass
for(b2Index=0; b2Index<b2Len;){
ch = 0;
U16_NEXT(b2, b2Index, b2Len, ch);
UTRIE_GET16(&profile->sprepTrie,ch,result);
type = getValues(result, value, isIndex);
if( type == USPREP_PROHIBITED ||
((result < _SPREP_TYPE_THRESHOLD) && (result&0x01))){
*status = U_STRINGPREP_PROHIBITED_ERROR;
uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
goto CLEANUP;
}
direction = u_charDirection(ch);
if(firstCharDir == U_CHAR_DIRECTION_COUNT){
firstCharDir = direction;
}
if(direction == U_LEFT_TO_RIGHT){
leftToRight = TRUE;
ltrPos = b2Index-1;
}
if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
rightToLeft = TRUE;
rtlPos = b2Index-1;
}
}
if(checkBiDi == TRUE){
// satisfy 2
if( leftToRight == TRUE && rightToLeft == TRUE){
*status = U_STRINGPREP_CHECK_BIDI_ERROR;
uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
goto CLEANUP;
}
//satisfy 3
if( rightToLeft == TRUE &&
!((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
){
*status = U_STRINGPREP_CHECK_BIDI_ERROR;
uprv_syntaxError(b2, rtlPos, b2Len, parseError);
return FALSE;
}
}
if(b2Len <= destCapacity){
uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
}
CLEANUP:
if(b1!=b1Stack){
uprv_free(b1);
}
if(b1!=b1Stack && b2!=b2Stack){
uprv_free(b2);
}
return u_terminateUChars(dest, destCapacity, b2Len, status);
}
U_CFUNC UBool
usprep_isLabelSeparator(UStringPrepProfile* profile,
UChar32 ch, UErrorCode* status){
// check error status
if(status==NULL || U_FAILURE(*status)){
return FALSE;
}
//check the arguments
if(profile==NULL){
*status = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
uint16_t result;
UStringPrepType type;
int16_t value;
UBool isIndex;
UTRIE_GET16(&profile->sprepTrie,ch, result);
type = getValues(result,value,isIndex);
if( type == USPREP_LABEL_SEPARATOR){
return TRUE;
}
return FALSE;
}
#endif /* #if !UCONFIG_NO_IDNA */

View file

@ -77,7 +77,7 @@ u_strFromUTF32(UChar *dest,
return NULL;
}
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
@ -163,7 +163,7 @@ u_strToUTF32(UChar32 *dest,
}
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
@ -234,7 +234,7 @@ u_strFromUTF8(UChar *dest,
return NULL;
}
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
@ -332,7 +332,7 @@ u_strToUTF8(char *dest,
return NULL;
}
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
if((pSrc==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
@ -614,7 +614,7 @@ u_strToWCS(wchar_t *dest,
return NULL;
}
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
@ -866,7 +866,7 @@ u_strFromWCS(UChar *dest,
return NULL;
}
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}

View file

@ -34,7 +34,8 @@
U_CAPI UNewTrie * U_EXPORT2
utrie_open(UNewTrie *fillIn,
uint32_t *aliasData, int32_t maxDataLength,
uint32_t initialValue, UBool latin1Linear) {
uint32_t initialValue, uint32_t leadUnitValue,
UBool latin1Linear) {
UNewTrie *trie;
int32_t i, j;
@ -89,6 +90,7 @@ utrie_open(UNewTrie *fillIn,
trie->data[--j]=initialValue;
}
trie->leadUnitValue=leadUnitValue;
trie->indexLength=UTRIE_MAX_INDEX_LENGTH;
trie->dataCapacity=maxDataLength;
trie->isLatin1Linear=latin1Linear;
@ -118,7 +120,9 @@ utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_
isDataAllocated=TRUE;
}
trie=utrie_open(fillIn, aliasData, aliasDataCapacity, other->data[0], other->isLatin1Linear);
trie=utrie_open(fillIn, aliasData, aliasDataCapacity,
other->data[0], other->leadUnitValue,
other->isLatin1Linear);
if(trie==NULL) {
uprv_free(aliasData);
} else {
@ -396,6 +400,22 @@ utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *p
}
}
/*
* set all values for lead surrogate code *units* to leadUnitValue
* so that by default runtime lookups will find no data for associated
* supplementary code points, unless there is data for such code points
* which will result in a non-zero folding value below that is set for
* the respective lead units
*
* the above saved the indexes for surrogate code *points* and
* write-protected their data values
*/
if(!utrie_setRange32(trie, 0xd800, 0xdc00, trie->leadUnitValue, TRUE)) {
/* data table overflow */
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
/*
* Fold significant index values into the area just after the BMP indexes.
* In case the first lead surrogate has significant data,
@ -418,15 +438,19 @@ utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *p
/* is there an identical index block? */
block=_findSameIndexBlock(index, indexLength, c>>UTRIE_SHIFT);
/* get a folded value for [c..c+0x400[ and, if 0, set it for the lead surrogate */
/*
* get a folded value for [c..c+0x400[ and,
* if different from the value for the lead surrogate code point,
* set it for the lead surrogate code unit
*/
value=getFoldedValue(trie, c, block+UTRIE_SURROGATE_BLOCK_COUNT);
if(value!=0) {
if(!utrie_set32(trie, 0xd7c0+(c>>10), value)) {
if(value!=utrie_get32(trie, U16_LEAD(c), NULL)) {
if(!utrie_set32(trie, U16_LEAD(c), value)) {
/* data table overflow */
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
utrie_get32(trie, U16_LEAD(c), NULL);
/* if we did not find an identical index block... */
if(block==indexLength) {
/* move the actual index (stage 1) entries from the supplementary position to the new one */
@ -435,6 +459,7 @@ utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *p
4*UTRIE_SURROGATE_BLOCK_COUNT);
indexLength+=UTRIE_SURROGATE_BLOCK_COUNT;
}
utrie_get32(trie, U16_LEAD(c), NULL);
}
c+=0x400;
} else {
@ -727,9 +752,11 @@ utrie_serialize(UNewTrie *trie, void *dt, int32_t capacity,
/* fold the supplementary part of the index array */
utrie_fold(trie, getFoldedValue, pErrorCode);
utrie_get32(trie, U16_LEAD(0x10400), NULL);
/* compact again with overlap for minimum data array length */
utrie_compact(trie, TRUE, pErrorCode);
utrie_get32(trie, U16_LEAD(0x10400), NULL);
trie->isCompacted=TRUE;
if(U_FAILURE(*pErrorCode)) {

View file

@ -492,6 +492,7 @@ struct UNewTrie {
int32_t index[UTRIE_MAX_INDEX_LENGTH];
uint32_t *data;
uint32_t leadUnitValue;
int32_t indexLength, dataCapacity, dataLength;
UBool isAllocated, isDataAllocated;
UBool isLatin1Linear, isCompacted;
@ -546,6 +547,8 @@ UNewTrieGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset);
* @param maxDataLength the capacity of aliasData (if not NULL) or
* the length of the data array to be allocated
* @param initialValue the initial value that is set for all code points
* @param leadUnitValue the value for lead surrogate code _units_ that do not
* have associated supplementary data
* @param latin1Linear a flag indicating whether the Latin-1 range is to be allocated and
* kept in a linear, contiguous part of the data array
* @return a pointer to the initialized fillIn or the allocated and initialized new UNewTrie
@ -553,7 +556,8 @@ UNewTrieGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset);
U_CAPI UNewTrie * U_EXPORT2
utrie_open(UNewTrie *fillIn,
uint32_t *aliasData, int32_t maxDataLength,
uint32_t initialValue, UBool latin1Linear);
uint32_t initialValue, uint32_t leadUnitValue,
UBool latin1Linear);
/**
* Clone a build-time trie structure with all entries.

View file

@ -136,7 +136,7 @@ package390: $(BUILDDIR)/icudata390.lst $(BUILDDIR)/icudata.lst ./icupkg.inc
##### Define all the data files. the build rule that depends on them is below.
## DAT files - Misc. data files.
DAT_FILES_SHORT=uprops.icu pnames.icu unames.icu unorm.icu cnvalias.icu tz.icu ucadata.icu invuca.icu uidna.icu
DAT_FILES_SHORT=uprops.icu pnames.icu unames.icu unorm.icu cnvalias.icu tz.icu ucadata.icu invuca.icu uidna.spp
DAT_FILES=$(DAT_FILES_SHORT:%=$(BUILDDIR)/$(ICUDT)%)
## BRK files
@ -231,9 +231,9 @@ $(BUILDDIR)/$(ICUDT)cnvalias.icu: $(UCMSRCDIR)/convrtrs.txt $(TOOLDIR)/gencnval/
$(BUILDDIR)/$(ICUDT)tz.icu: $(MISCSRCDIR)/timezone.txt $(TOOLDIR)/gentz/gentz$(EXEEXT)
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/gentz/gentz -d $(BUILDDIR) $(MISCSRCDIR)/timezone.txt
# uidna.icu
$(BUILDDIR)/$(ICUDT)uidna.icu: $(MISCSRCDIR)/rfc3454_A_1.txt $(MISCSRCDIR)/rfc3454_B_1.txt $(MISCSRCDIR)/rfc3454_B_2.txt $(MISCSRCDIR)/rfc3454_C_X.txt $(TOOLDIR)/genidna/genidna$(EXEEXT)
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/genidna/genidna -d $(BUILDDIR) -s $(SRCDATADIR)
# uidna.spp
$(BUILDDIR)/$(ICUDT)uidna.spp: $(MISCSRCDIR)/NamePrepProfile.txt $(TOOLDIR)/gensprep/gensprep$(EXEEXT)
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/gensprep/gensprep -d $(BUILDDIR) -s $(MISSRCDIR) -b uidna -n $(UNICODEDATADIR) -u 3.2.0 -k NamePrepProfile.txt
#################################################### BRK
# BRK FILES
@ -314,7 +314,7 @@ TESTDATA=testdata
TESTDT=$(TESTDATA)_
# File definitions
TEST_DAT_FILES=$(TESTBUILDDIR)/$(TESTDT)test.icu
TEST_DAT_FILES=$(TESTBUILDDIR)/$(TESTDT)test.icu $(TESTBUILDDIR)/$(TESTDT)nfscsi.spp $(TESTBUILDDIR)/$(TESTDT)nfscss.spp $(TESTBUILDDIR)/$(TESTDT)nfscis.spp $(TESTBUILDDIR)/$(TESTDT)nfsmxs.spp $(TESTBUILDDIR)/$(TESTDT)nfsmxp.spp
TEST_UCM_SOURCE= test1.ucm test3.ucm test4.ucm ibm9027.ucm
TEST_UCM_FILES=$(TEST_UCM_SOURCE:%=$(TESTSRCDATADIR)/data/%)
@ -341,6 +341,31 @@ build-testdata: $(ALL_TEST_FILES) $(TESTBUILDDIR)/testdata.lst $(TESTBUILDDIR)/
$(TESTBUILDDIR)/$(TESTDT)test.icu: $(TOOLDIR)/gentest/gentest$(EXEEXT)
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/gentest/gentest -d $(TESTBUILDDIR)
# Targets for nfscsi.icu
$(TESTBUILDDIR)/$(TESTDT)nfscsi.icu: $(TOOLDIR)/gensprep/gensprep$(EXEEXT) $(TESTSRCDATADIR)/nfs4_cs_prep_ci.txt
@echo Building nfscsi.icu
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/gensprep/gensprep -s $(TESTSRCDATADIR) -d $(TESTBUILDDIR) -b nfscsi -p $(TESTDATA) -u 3.2.0 nfs4_cs_prep_ci.txt
# Targets for nfscss.icu
$(TESTBUILDDIR)/$(TESTDT)nfscss.icu: $(TOOLDIR)/gensprep/gensprep$(EXEEXT) $(TESTSRCDATADIR)/nfs4_cs_prep_cs.txt
@echo Building nfscss.icu
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/gensprep/gensprep -s $(TESTSRCDATADIR) -d $(TESTBUILDDIR) -b nfscss -p $(TESTDATA) -u 3.2.0 nfs4_cs_prep_cs.txt
# Targets for nfscis.spp
$(TESTBUILDDIR)/$(TESTDT)nfscis.spp: $(TOOLDIR)/gensprep/gensprep$(EXEEXT) $(TESTSRCDATADIR)/nfs4_cis_prep.txt
@echo Building nfscis.spp
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/gensprep/gensprep -s $(TESTSRCDATADIR) -d $(TESTBUILDDIR) -b nfscis -p $(TESTDATA) -k -n $(UNICODEDATADIR) -u 3.2.0 nfs4_cis_prep.txt
# Targets for nfsmxs.spp
$(TESTBUILDDIR)/$(TESTDT)nfsmxs.spp: $(TOOLDIR)/gensprep/gensprep$(EXEEXT) $(TESTSRCDATADIR)/nfs4_mixed_prep_s.txt
@echo Building nfsmxs.spp
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/gensprep/gensprep -s $(TESTSRCDATADIR) -d $(TESTBUILDDIR) -b nfsmxs -p $(TESTDATA) -k -n $(UNICODEDATADIR) -u 3.2.0 nfs4_mixed_prep_s.txt
# Targets for nfsmxp.spp
$(TESTBUILDDIR)/$(TESTDT)nfsmxp.spp: $(TOOLDIR)/gensprep/gensprep$(EXEEXT) $(TESTSRCDATADIR)/nfs4_mixed_prep_p.txt
@echo Building nfsmxp.spp
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/gensprep/gensprep -s $(TESTSRCDATADIR) -d $(TESTBUILDDIR) -b nfsmxp -p $(TESTDATA) -k -n $(UNICODEDATADIR) -u 3.2.0 nfs4_mixed_prep_p.txt
$(TESTBUILDDIR)/$(TESTDT)%.cnv: $(TESTSRCDATADIR)/%.ucm $(TOOLDIR)/makeconv/makeconv$(EXEEXT)
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/makeconv/makeconv -p $(TESTDATA) -c -d $(TESTBUILDDIR) $(TESTSRCDATADIR)/$(<F)

View file

@ -245,7 +245,7 @@ BRK_FILES = $(ICUDT)sent.brk $(ICUDT)char.brk $(ICUDT)line.brk $(ICUDT)word.brk
# move the .dll and .lib files to their final destination afterwards.
# The $(U_ICUDATA_NAME).lib and $(U_ICUDATA_NAME).exp should already be in the right place due to stubdata.
#
"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata.exe" $(CNV_FILES) $(BRK_FILES) "$(ICUBLD)\$(ICUDT)uprops.icu" "$(ICUBLD)\$(ICUDT)unames.icu" "$(ICUBLD)\$(ICUDT)pnames.icu" "$(ICUBLD)\$(ICUDT)unorm.icu" "$(ICUBLD)\$(ICUDT)cnvalias.icu" "$(ICUBLD)\$(ICUDT)tz.icu" "$(ICUBLD)\$(ICUDT)ucadata.icu" "$(ICUBLD)\$(ICUDT)invuca.icu" "$(ICUBLD)\$(ICUDT)uidna.icu" $(ALL_RES) "$(ICUBLD)\$(ICUDT)icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt"
"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata.exe" $(CNV_FILES) $(BRK_FILES) "$(ICUBLD)\$(ICUDT)uprops.icu" "$(ICUBLD)\$(ICUDT)unames.icu" "$(ICUBLD)\$(ICUDT)pnames.icu" "$(ICUBLD)\$(ICUDT)unorm.icu" "$(ICUBLD)\$(ICUDT)cnvalias.icu" "$(ICUBLD)\$(ICUDT)tz.icu" "$(ICUBLD)\$(ICUDT)ucadata.icu" "$(ICUBLD)\$(ICUDT)invuca.icu" "$(ICUBLD)\$(ICUDT)uidna.spp" $(ALL_RES) "$(ICUBLD)\$(ICUDT)icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt"
@echo Building icu data
@cd "$(ICUBLD)"
@"$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata" -f -e $(U_ICUDATA_NAME) -v -m dll -c -p $(ICUPKG) -O "$(PKGOPT)" -d "$(ICUBLD)" -s . <<pkgdatain.txt
@ -255,7 +255,7 @@ $(ICUDT)pnames.icu
$(ICUDT)unames.icu
$(ICUDT)ucadata.icu
$(ICUDT)invuca.icu
$(ICUDT)uidna.icu
$(ICUDT)uidna.spp
$(ICUDT)tz.icu
$(ICUDT)cnvalias.icu
$(CNV_FILES:.cnv =.cnv
@ -409,9 +409,9 @@ res_index {
@set ICU_DATA=$(ICUBLD)
@"$(ICUTOOLS)\genuca\$(CFG)\genuca" -s "$(ICUUNIDATA)"
# Targets for uidna.icu
"$(ICUBLD)\$(ICUDT)uidna.icu" : "$(ICUUNIDATA)\*.txt" "$(ICUMISC)\*.txt"
genidna -s "$(ICUDATA)" -d "$(ICUBLD)\\"
# Targets for uidna.spp
"$(ICUBLD)\$(ICUDT)uidna.spp" : "$(ICUUNIDATA)\*.txt" "$(ICUMISC)\NamePrepProfile.txt"
gensprep -s "$(ICUMISC)" -d "$(ICUBLD)\\" -b uidna -n "$(ICUUNIDATA)" -k -u 3.2.0 NamePrepProfile.txt
# Dependencies on the tools for the batch inference rules

File diff suppressed because it is too large Load diff

View file

@ -1,407 +0,0 @@
###################
# Copyright (C) 2003, International Business Machines
# Corporation and others. All Rights Reserved.
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# This file contains code points from Table A.1 from RFC 3454
0221;
0234..024F;
02AE..02AF;
02EF..02FF;
0350..035F;
0370..0373;
0376..0379;
037B..037D;
037F..0383;
038B;
038D;
03A2;
03CF;
03F7..03FF;
0487;
04CF;
04F6..04F7;
04FA..04FF;
0510..0530;
0557..0558;
0560;
0588;
058B..0590;
05A2;
05BA;
05C5..05CF;
05EB..05EF;
05F5..060B;
060D..061A;
061C..061E;
0620;
063B..063F;
0656..065F;
06EE..06EF;
06FF;
070E;
072D..072F;
074B..077F;
07B2..0900;
0904;
093A..093B;
094E..094F;
0955..0957;
0971..0980;
0984;
098D..098E;
0991..0992;
09A9;
09B1;
09B3..09B5;
09BA..09BB;
09BD;
09C5..09C6;
09C9..09CA;
09CE..09D6;
09D8..09DB;
09DE;
09E4..09E5;
09FB..0A01;
0A03..0A04;
0A0B..0A0E;
0A11..0A12;
0A29;
0A31;
0A34;
0A37;
0A3A..0A3B;
0A3D;
0A43..0A46;
0A49..0A4A;
0A4E..0A58;
0A5D;
0A5F..0A65;
0A75..0A80;
0A84;
0A8C;
0A8E;
0A92;
0AA9;
0AB1;
0AB4;
0ABA..0ABB;
0AC6;
0ACA;
0ACE..0ACF;
0AD1..0ADF;
0AE1..0AE5;
0AF0..0B00;
0B04;
0B0D..0B0E;
0B11..0B12;
0B29;
0B31;
0B34..0B35;
0B3A..0B3B;
0B44..0B46;
0B49..0B4A;
0B4E..0B55;
0B58..0B5B;
0B5E;
0B62..0B65;
0B71..0B81;
0B84;
0B8B..0B8D;
0B91;
0B96..0B98;
0B9B;
0B9D;
0BA0..0BA2;
0BA5..0BA7;
0BAB..0BAD;
0BB6;
0BBA..0BBD;
0BC3..0BC5;
0BC9;
0BCE..0BD6;
0BD8..0BE6;
0BF3..0C00;
0C04;
0C0D;
0C11;
0C29;
0C34;
0C3A..0C3D;
0C45;
0C49;
0C4E..0C54;
0C57..0C5F;
0C62..0C65;
0C70..0C81;
0C84;
0C8D;
0C91;
0CA9;
0CB4;
0CBA..0CBD;
0CC5;
0CC9;
0CCE..0CD4;
0CD7..0CDD;
0CDF;
0CE2..0CE5;
0CF0..0D01;
0D04;
0D0D;
0D11;
0D29;
0D3A..0D3D;
0D44..0D45;
0D49;
0D4E..0D56;
0D58..0D5F;
0D62..0D65;
0D70..0D81;
0D84;
0D97..0D99;
0DB2;
0DBC;
0DBE..0DBF;
0DC7..0DC9;
0DCB..0DCE;
0DD5;
0DD7;
0DE0..0DF1;
0DF5..0E00;
0E3B..0E3E;
0E5C..0E80;
0E83;
0E85..0E86;
0E89;
0E8B..0E8C;
0E8E..0E93;
0E98;
0EA0;
0EA4;
0EA6;
0EA8..0EA9;
0EAC;
0EBA;
0EBE..0EBF;
0EC5;
0EC7;
0ECE..0ECF;
0EDA..0EDB;
0EDE..0EFF;
0F48;
0F6B..0F70;
0F8C..0F8F;
0F98;
0FBD;
0FCD..0FCE;
0FD0..0FFF;
1022;
1028;
102B;
1033..1035;
103A..103F;
105A..109F;
10C6..10CF;
10F9..10FA;
10FC..10FF;
115A..115E;
11A3..11A7;
11FA..11FF;
1207;
1247;
1249;
124E..124F;
1257;
1259;
125E..125F;
1287;
1289;
128E..128F;
12AF;
12B1;
12B6..12B7;
12BF;
12C1;
12C6..12C7;
12CF;
12D7;
12EF;
130F;
1311;
1316..1317;
131F;
1347;
135B..1360;
137D..139F;
13F5..1400;
1677..167F;
169D..169F;
16F1..16FF;
170D;
1715..171F;
1737..173F;
1754..175F;
176D;
1771;
1774..177F;
17DD..17DF;
17EA..17FF;
180F;
181A..181F;
1878..187F;
18AA..1DFF;
1E9C..1E9F;
1EFA..1EFF;
1F16..1F17;
1F1E..1F1F;
1F46..1F47;
1F4E..1F4F;
1F58;
1F5A;
1F5C;
1F5E;
1F7E..1F7F;
1FB5;
1FC5;
1FD4..1FD5;
1FDC;
1FF0..1FF1;
1FF5;
1FFF;
2053..2056;
2058..205E;
2064..2069;
2072..2073;
208F..209F;
20B2..20CF;
20EB..20FF;
213B..213C;
214C..2152;
2184..218F;
23CF..23FF;
2427..243F;
244B..245F;
24FF;
2614..2615;
2618;
267E..267F;
268A..2700;
2705;
270A..270B;
2728;
274C;
274E;
2753..2755;
2757;
275F..2760;
2795..2797;
27B0;
27BF..27CF;
27EC..27EF;
2B00..2E7F;
2E9A;
2EF4..2EFF;
2FD6..2FEF;
2FFC..2FFF;
3040;
3097..3098;
3100..3104;
312D..3130;
318F;
31B8..31EF;
321D..321F;
3244..3250;
327C..327E;
32CC..32CF;
32FF;
3377..337A;
33DE..33DF;
33FF;
4DB6..4DFF;
9FA6..9FFF;
A48D..A48F;
A4C7..ABFF;
D7A4..D7FF;
FA2E..FA2F;
FA6B..FAFF;
FB07..FB12;
FB18..FB1C;
FB37;
FB3D;
FB3F;
FB42;
FB45;
FBB2..FBD2;
FD40..FD4F;
FD90..FD91;
FDC8..FDCF;
FDFD..FDFF;
FE10..FE1F;
FE24..FE2F;
FE47..FE48;
FE53;
FE67;
FE6C..FE6F;
FE75;
FEFD..FEFE;
FF00;
FFBF..FFC1;
FFC8..FFC9;
FFD0..FFD1;
FFD8..FFD9;
FFDD..FFDF;
FFE7;
FFEF..FFF8;
10000..102FF;
1031F;
10324..1032F;
1034B..103FF;
10426..10427;
1044E..1CFFF;
1D0F6..1D0FF;
1D127..1D129;
1D1DE..1D3FF;
1D455;
1D49D;
1D4A0..1D4A1;
1D4A3..1D4A4;
1D4A7..1D4A8;
1D4AD;
1D4BA;
1D4BC;
1D4C1;
1D4C4;
1D506;
1D50B..1D50C;
1D515;
1D51D;
1D53A;
1D53F;
1D545;
1D547..1D549;
1D551;
1D6A4..1D6A7;
1D7CA..1D7CD;
1D800..1FFFD;
2A6D7..2F7FF;
2FA1E..2FFFD;
30000..3FFFD;
40000..4FFFD;
50000..5FFFD;
60000..6FFFD;
70000..7FFFD;
80000..8FFFD;
90000..9FFFD;
A0000..AFFFD;
B0000..BFFFD;
C0000..CFFFD;
D0000..DFFFD;
E0000;
E0002..E001F;
E0080..EFFFD;
# Total code points 3653

View file

@ -1,38 +0,0 @@
###################
# Copyright (C) 2003, International Business Machines
# Corporation and others. All Rights Reserved.
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# This file contains code points from Table B.1 from RFC 3454
00AD; ; Map to nothing
034F; ; Map to nothing
1806; ; Map to nothing
180B; ; Map to nothing
180C; ; Map to nothing
180D; ; Map to nothing
200B; ; Map to nothing
200C; ; Map to nothing
200D; ; Map to nothing
2060; ; Map to nothing
FE00; ; Map to nothing
FE01; ; Map to nothing
FE02; ; Map to nothing
FE03; ; Map to nothing
FE04; ; Map to nothing
FE05; ; Map to nothing
FE06; ; Map to nothing
FE07; ; Map to nothing
FE08; ; Map to nothing
FE09; ; Map to nothing
FE0A; ; Map to nothing
FE0B; ; Map to nothing
FE0C; ; Map to nothing
FE0D; ; Map to nothing
FE0E; ; Map to nothing
FE0F; ; Map to nothing
FEFF; ; Map to nothing
# Total code points 27

File diff suppressed because it is too large Load diff

View file

@ -1,182 +0,0 @@
###################
# Copyright (C) 2003, International Business Machines
# Corporation and others. All Rights Reserved.
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.1.1
0020; SPACE
# Total code points 1
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.1.2
00A0; NO..BREAK SPACE
1680; OGHAM SPACE MARK
2000; EN QUAD
2001; EM QUAD
2002; EN SPACE
2003; EM SPACE
2004; THREE..PER-EM SPACE
2005; FOUR..PER-EM SPACE
2006; SIX..PER-EM SPACE
2007; FIGURE SPACE
2008; PUNCTUATION SPACE
2009; THIN SPACE
200A; HAIR SPACE
200B; ZERO WIDTH SPACE
202F; NARROW NO..BREAK SPACE
205F; MEDIUM MATHEMATICAL SPACE
3000; IDEOGRAPHIC SPACE
# Total code points 13
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.2.1
0000..001F; [CONTROL CHARACTERS]
007F; DELETE
# Total code points 18
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.2.2
0080..009F; [CONTROL CHARACTERS]
06DD; ARABIC END OF AYAH
070F; SYRIAC ABBREVIATION MARK
180E; MONGOLIAN VOWEL SEPARATOR
200C; ZERO WIDTH NON..JOINER
200D; ZERO WIDTH JOINER
2028; LINE SEPARATOR
2029; PARAGRAPH SEPARATOR
2060; WORD JOINER
2061; FUNCTION APPLICATION
2062; INVISIBLE TIMES
2063; INVISIBLE SEPARATOR
206A..206F; [CONTROL CHARACTERS]
FEFF; ZERO WIDTH NO..BREAK SPACE
FFF9..FFFC; [CONTROL CHARACTERS]
1D173..1D17A; [MUSICAL CONTROL CHARACTERS]
# Total code points 29
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.3
E000..F8FF; [PRIVATE USE, PLANE 0]
F0000..FFFFD; [PRIVATE USE, PLANE 15]
100000..10FFFD; [PRIVATE USE, PLANE 16]
# Total code points 2051
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.4
FDD0..FDEF; [NONCHARACTER CODE POINTS]
FFFE..FFFF; [NONCHARACTER CODE POINTS]
1FFFE..1FFFF; [NONCHARACTER CODE POINTS]
2FFFE..2FFFF; [NONCHARACTER CODE POINTS]
3FFFE..3FFFF; [NONCHARACTER CODE POINTS]
4FFFE..4FFFF; [NONCHARACTER CODE POINTS]
5FFFE..5FFFF; [NONCHARACTER CODE POINTS]
6FFFE..6FFFF; [NONCHARACTER CODE POINTS]
7FFFE..7FFFF; [NONCHARACTER CODE POINTS]
8FFFE..8FFFF; [NONCHARACTER CODE POINTS]
9FFFE..9FFFF; [NONCHARACTER CODE POINTS]
AFFFE..AFFFF; [NONCHARACTER CODE POINTS]
BFFFE..BFFFF; [NONCHARACTER CODE POINTS]
CFFFE..CFFFF; [NONCHARACTER CODE POINTS]
DFFFE..DFFFF; [NONCHARACTER CODE POINTS]
EFFFE..EFFFF; [NONCHARACTER CODE POINTS]
FFFFE..FFFFF; [NONCHARACTER CODE POINTS]
10FFFE..10FFFF; [NONCHARACTER CODE POINTS]
# Total code points 18
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.5
D800..DFFF; [SURROGATE CODES]
# Total code points 0
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.6
FFF9; INTERLINEAR ANNOTATION ANCHOR
FFFA; INTERLINEAR ANNOTATION SEPARATOR
FFFB; INTERLINEAR ANNOTATION TERMINATOR
FFFC; OBJECT REPLACEMENT CHARACTER
FFFD; REPLACEMENT CHARACTER
# Total code points 5
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.7
2FF0..2FFB; [IDEOGRAPHIC DESCRIPTION CHARACTERS]
# Total code points 1
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.8
0340; COMBINING GRAVE TONE MARK
0341; COMBINING ACUTE TONE MARK
200E; LEFT..TO-RIGHT MARK
200F; RIGHT..TO-LEFT MARK
202A; LEFT..TO-RIGHT EMBEDDING
202B; RIGHT..TO-LEFT EMBEDDING
202C; POP DIRECTIONAL FORMATTING
202D; LEFT..TO-RIGHT OVERRIDE
202E; RIGHT..TO-LEFT OVERRIDE
206A; INHIBIT SYMMETRIC SWAPPING
206B; ACTIVATE SYMMETRIC SWAPPING
206C; INHIBIT ARABIC FORM SHAPING
206D; ACTIVATE ARABIC FORM SHAPING
206E; NATIONAL DIGIT SHAPES
206F; NOMINAL DIGIT SHAPES
# Total code points 9
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
#################
# code points from Table C.9
E0001; LANGUAGE TAG
E0020..E007F; [TAGGING CHARACTERS]
# Total code points 82

View file

@ -1082,7 +1082,7 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st
}
tempUCATable *t = uprv_uca_initTempTable(image, src->opts, src->UCA, NOT_FOUND_TAG, status);
tempUCATable *t = uprv_uca_initTempTable(image, src->opts, src->UCA, NOT_FOUND_TAG, NOT_FOUND_TAG, status);
/* After this, we have assigned CE values to all regular CEs */

View file

@ -108,7 +108,7 @@ static int32_t uprv_uca_addExpansion(ExpansionTable *expansions, uint32_t value,
}
U_CAPI tempUCATable* U_EXPORT2
uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UErrorCode *status) {
uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status) {
tempUCATable *t = (tempUCATable *)uprv_malloc(sizeof(tempUCATable));
/* test for NULL */
if (t == NULL) {
@ -147,7 +147,12 @@ uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollat
}
uprv_memset(t->expansions, 0, sizeof(ExpansionTable));
/*t->mapping = ucmpe32_open(UCOL_SPECIAL_FLAG | (initTag<<24), UCOL_SPECIAL_FLAG | (SURROGATE_TAG<<24), UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG<<24), status);*/
t->mapping = utrie_open(NULL, NULL, 0x100000, UCOL_SPECIAL_FLAG | (initTag<<24), TRUE); // Do your own mallocs for the structure, array and have linear Latin 1
/*t->mapping = utrie_open(NULL, NULL, 0x100000, UCOL_SPECIAL_FLAG | (initTag<<24), TRUE); // Do your own mallocs for the structure, array and have linear Latin 1*/
t->mapping = utrie_open(NULL, NULL, 0x100000,
UCOL_SPECIAL_FLAG | (initTag<<24),
UCOL_SPECIAL_FLAG | (supplementaryInitTag << 24),
TRUE); // Do your own mallocs for the structure, array and have linear Latin 1
t->prefixLookup = uhash_open(prefixLookupHash, prefixLookupComp, status);
uhash_setValueDeleter(t->prefixLookup, uhash_freeBlock);
@ -1357,11 +1362,13 @@ uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status) {
// This is debug code to dump the contents of the trie. It needs two functions defined above
{
UTrie UCAt = { 0 };
uint32_t trieWord;
utrie_unserialize(&UCAt, dataStart+tableOffset, 9999999, status);
UCAt.getFoldingOffset = myGetFoldingOffset;
if(U_SUCCESS(*status)) {
utrie_enum(&UCAt, NULL, enumRange, NULL);
}
trieWord = UTRIE_GET32_FROM_LEAD(UCAt, 0xDC01)
}
#endif
tableOffset += paddedsize(mappingSize);

View file

@ -91,7 +91,7 @@ typedef struct {
UHashtable *prefixLookup;
} tempUCATable;
U_CAPI tempUCATable * U_EXPORT2 uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UErrorCode *status);
U_CAPI tempUCATable * U_EXPORT2 uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status);
U_CAPI tempUCATable * U_EXPORT2 uprv_uca_cloneTempTable(tempUCATable *t, UErrorCode *status);
U_CAPI void U_EXPORT2 uprv_uca_closeTempTable(tempUCATable *t);
U_CAPI uint32_t U_EXPORT2 uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status);

View file

@ -33,6 +33,8 @@ void addCompactArrayTest(TestNode** root);
void addTestDeprecatedAPI(TestNode** root);
void addUCharTransformTest(TestNode** root);
void addUSetTest(TestNode** root);
void addUStringPrepTest(TestNode** root);
void addIDNATest(TestNode** root);
void addAllTests(TestNode** root)
{
@ -61,5 +63,9 @@ void addAllTests(TestNode** root)
#endif
addUSetTest(root);
addTestDeprecatedAPI(root);
#if !UCONFIG_NO_IDNA
addUStringPrepTest(root);
addIDNATest(root);
#endif
}

View file

@ -605,5 +605,29 @@ SOURCE=.\utf8tst.c
SOURCE=.\utransts.c
# End Source File
# End Group
# Begin Group "sprep & idna"
# PROP Default_Filter ""
# Begin Source File
SOURCE=.\idnatest.c
# End Source File
# Begin Source File
SOURCE=.\nfsprep.c
# End Source File
# Begin Source File
SOURCE=.\nfsprep.h
# End Source File
# Begin Source File
SOURCE=.\spreptst.c
# End Source File
# Begin Source File
SOURCE=.\sprpdata.c
# End Source File
# End Group
# End Target
# End Project

View file

@ -1453,6 +1453,7 @@ static void TestComposeDecompose(void) {
UChar32 u = 0;
UChar comp[NORM_BUFFER_TEST_LEN];
uint32_t len = 0;
UCollationElements *iter;
noOfLoc = uloc_countAvailable();
@ -1514,6 +1515,7 @@ static void TestComposeDecompose(void) {
ucol_close(coll);
log_verbose("Testing locales, number of cases = %i\n", noCases);
iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
for(i = 0; i<noOfLoc; i++) {
status = U_ZERO_ERROR;
locName = uloc_getAvailable(i);
@ -1535,6 +1537,12 @@ static void TestComposeDecompose(void) {
if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
log_verbose("Testing NFC\n");
ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
backAndForth(iter);
log_verbose("Testing NFD\n");
ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
backAndForth(iter);
}
}
ucol_close(coll);
@ -1544,6 +1552,7 @@ static void TestComposeDecompose(void) {
free(t[u]);
}
free(t);
ucol_closeElements(iter);
}
static void TestEmptyRule(void) {
@ -3374,56 +3383,69 @@ static void TestRuleOptions(void) {
} tests[] = {
/* - all befores here amount to zero */
{ "&[before 1][first tertiary ignorable]<<<a",
{ "\\u0000", "a"}, 2}, /* you cannot go before first tertiary ignorable */
{ "\\u0000", "a"}, 2
}, /* you cannot go before first tertiary ignorable */
{ "&[before 1][last tertiary ignorable]<<<a",
{ "\\u0000", "a"}, 2}, /* you cannot go before last tertiary ignorable */
{ "\\u0000", "a"}, 2
}, /* you cannot go before last tertiary ignorable */
{ "&[before 1][first secondary ignorable]<<<a",
{ "\\u0000", "a"}, 2}, /* you cannot go before first secondary ignorable */
{ "\\u0000", "a"}, 2
}, /* you cannot go before first secondary ignorable */
{ "&[before 1][last secondary ignorable]<<<a",
{ "\\u0000", "a"}, 2}, /* you cannot go before first secondary ignorable */
{ "\\u0000", "a"}, 2
}, /* you cannot go before first secondary ignorable */
/* 'normal' befores */
{ "&[before 1][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
{ "c", "b", "\\u0332", "a" }, 4},
{ "c", "b", "\\u0332", "a" }, 4
},
/* we don't have a code point that corresponds to
* the last primary ignorable
*/
{ "&[before 2][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
{ "\\u0332", "\\u20e3", "c", "b", "a" }, 5},
{ "\\u0332", "\\u20e3", "c", "b", "a" }, 5
},
{ "&[before 1][first variable]<<<c<<<b &[first variable]<a",
{ "c", "b", "\\u0009", "a", "\\u000a" }, 5},
{ "c", "b", "\\u0009", "a", "\\u000a" }, 5
},
{ "&[last variable]<a &[before 1][last variable]<<<c<<<b ",
{ "c", "b", "\\uD800\\uDF23", "a", "\\u02d0" }, 5},
{ "c", "b", "\\uD800\\uDF23", "a", "\\u02d0" }, 5
},
{ "&[first regular]<a"
"&[before 1][first regular]<b",
{ "b", "\\u02d0", "a", "\\u02d1"}, 4},
{ "b", "\\u02d0", "a", "\\u02d1"}, 4
},
{ "&[before 1][last regular]<b"
"&[last regular]<a",
{ "b", "\\uD801\\uDC25", "a", "\\u4e00" }, 4},
{ "b", "\\uD801\\uDC25", "a", "\\u4e00" }, 4
},
{ "&[before 1][first implicit]<b"
"&[first implicit]<a",
{ "b", "\\u4e00", "a", "\\u4e01"}, 4},
{ "b", "\\u4e00", "a", "\\u4e01"}, 4
},
{ "&[before 1][last implicit]<b"
"&[last implicit]<a",
{ "b", "\\U0010FFFC", "a" }, 3},
{ "b", "\\U0010FFFC", "a" }, 3
},
{ "&[last variable]<z"
"&[last primary ignorable]<x"
"&[last secondary ignorable]<<y"
"&[last tertiary ignorable]<<<w"
"&[top]<u",
{"\\ufffb", "w", "y", "\\u20e3", "x", "\\u137c", "z", "u"}, 7 }
{"\\ufffb", "w", "y", "\\u20e3", "x", "\\u137c", "z", "u"}, 7
}
};
uint32_t i;

View file

@ -537,12 +537,14 @@ static void TestNewTypes() {
UChar* expectedEscaped = (UChar*)malloc(U_SIZEOF_UCHAR * patternLen);
const UChar* got = ures_getStringByKey(theBundle,"test_unescaping",&len,&status);
int32_t expectedLen = u_unescape(pattern,expectedEscaped,patternLen);
if(u_strncmp(expectedEscaped,got,expectedLen)!=0 || expectedLen != len){
if(got==NULL || u_strncmp(expectedEscaped,got,expectedLen)!=0 || expectedLen != len){
log_err("genrb failed to unescape string\n");
}
for(i=0;i<expectedLen;i++){
if(expectedEscaped[i] != got[i]){
log_verbose("Expected: 0x%04X Got: 0x%04X \n",expectedEscaped[i], got[i]);
if(got != NULL){
for(i=0;i<expectedLen;i++){
if(expectedEscaped[i] != got[i]){
log_verbose("Expected: 0x%04X Got: 0x%04X \n",expectedEscaped[i], got[i]);
}
}
}
free(expectedEscaped);

View file

@ -0,0 +1,642 @@
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: idnatest.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003jul11
* created by: Ram Viswanadha
*/
#if !UCONFIG_NO_IDNA
#include <stdlib.h>
#include <string.h>
#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "unicode/uidna.h"
#include "cintltst.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
#define MAX_DEST_SIZE 1000
static void TestToUnicode(void);
static void TestToASCII(void);
static void TestIDNToUnicode(void);
static void TestIDNToASCII(void);
static void TestCompare(void);
void addIDNATest(TestNode** root);
typedef int32_t
(*TestFunc) ( const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity,
int32_t options, UParseError *parseError,
UErrorCode *status);
typedef int32_t
(*CompareFunc) (const UChar *s1, int32_t s1Len,
const UChar *s2, int32_t s2Len,
int32_t options,
UErrorCode *status);
void
addIDNATest(TestNode** root)
{
addTest(root, &TestToUnicode, "idna/TestToUnicode");
addTest(root, &TestToASCII, "idna/TestToASCII");
addTest(root, &TestIDNToUnicode, "idna/TestIDNToUnicode");
addTest(root, &TestIDNToASCII, "idna/TestIDNToASCII");
addTest(root, &TestCompare, "idna/TestCompare");
}
static void
testAPI(const UChar* src, const UChar* expected, const char* testName,
UBool useSTD3ASCIIRules,UErrorCode expectedStatus,
UBool doCompare, UBool testUnassigned, TestFunc func){
UErrorCode status = U_ZERO_ERROR;
UChar destStack[MAX_DEST_SIZE];
int32_t destLen = 0;
UChar* dest = NULL;
int32_t expectedLen = (expected != NULL) ? u_strlen(expected) : 0;
int32_t options = (useSTD3ASCIIRules == TRUE) ? UIDNA_USE_STD3_RULES : UIDNA_DEFAULT;
UParseError parseError;
int32_t tSrcLen = 0;
UChar* tSrc = NULL;
if(src != NULL){
tSrcLen = u_strlen(src);
tSrc =(UChar*) malloc( U_SIZEOF_UCHAR * tSrcLen );
memcpy(tSrc,src,tSrcLen * U_SIZEOF_UCHAR);
}
/* test null-terminated source and return value of number of UChars required */
if( expectedStatus != U_IDNA_STD3_ASCII_RULES_ERROR ){
destLen = func(src,-1,NULL,0,options, &parseError , &status);
if(status == U_BUFFER_OVERFLOW_ERROR){
status = U_ZERO_ERROR; /* reset error code */
if(destLen+1 < MAX_DEST_SIZE){
dest = destStack;
destLen = func(src,-1,dest,destLen+1,options, &parseError, &status);
/* TODO : compare output with expected */
if(U_SUCCESS(status) && expectedStatus != U_IDNA_STD3_ASCII_RULES_ERROR&& (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){
log_err("Did not get the expected result for null terminated source.\n" );
}
}else{
log_err( "%s null terminated source failed. Requires destCapacity > 300\n",testName);
}
}
if(status != expectedStatus){
log_err( "Did not get the expected error for %s null terminated source failed. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
free(tSrc);
return;
}
if(testUnassigned ){
status = U_ZERO_ERROR;
destLen = func(src,-1,NULL,0,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status);
if(status == U_BUFFER_OVERFLOW_ERROR){
status = U_ZERO_ERROR; /* reset error code */
if(destLen+1 < MAX_DEST_SIZE){
dest = destStack;
destLen = func(src,-1,dest,destLen+1,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status);
/* TODO : compare output with expected */
if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){
log_err("Did not get the expected result for %s null terminated source with both options set.\n",testName);
}
}else{
log_err( "%s null terminated source failed. Requires destCapacity > 300\n",testName);
}
}
/*testing query string*/
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_ERROR){
log_err( "Did not get the expected error for %s null terminated source with options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
}
}
status = U_ZERO_ERROR;
/* test source with lengthand return value of number of UChars required*/
destLen = func(tSrc, tSrcLen, NULL,0,options, &parseError, &status);
if(status == U_BUFFER_OVERFLOW_ERROR){
status = U_ZERO_ERROR; /* reset error code */
if(destLen+1 < MAX_DEST_SIZE){
dest = destStack;
destLen = func(src,u_strlen(src),dest,destLen+1,options, &parseError, &status);
/* TODO : compare output with expected */
if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){
log_err("Did not get the expected result for %s with source length.\n",testName);
}
}else{
log_err( "%s with source length failed. Requires destCapacity > 300\n",testName);
}
}
if(status != expectedStatus){
log_err( "Did not get the expected error for %s with source length. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
}
if(testUnassigned){
status = U_ZERO_ERROR;
destLen = func(tSrc,tSrcLen,NULL,0,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status);
if(status == U_BUFFER_OVERFLOW_ERROR){
status = U_ZERO_ERROR; /* reset error code */
if(destLen+1 < MAX_DEST_SIZE){
dest = destStack;
destLen = func(src,u_strlen(src),dest,destLen+1,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status);
/* TODO : compare output with expected */
if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){
log_err("Did not get the expected result for %s with source length and both options set.\n",testName);
}
}else{
log_err( "%s with source length failed. Requires destCapacity > 300\n",testName);
}
}
/*testing query string*/
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_ERROR){
log_err( "Did not get the expected error for %s with source length and options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
}
}
}else{
status = U_ZERO_ERROR;
destLen = func(src,-1,NULL,0,options | UIDNA_USE_STD3_RULES, &parseError, &status);
if(status == U_BUFFER_OVERFLOW_ERROR){
status = U_ZERO_ERROR; /* reset error code*/
if(destLen+1 < MAX_DEST_SIZE){
dest = destStack;
destLen = func(src,-1,dest,destLen+1,options | UIDNA_USE_STD3_RULES, &parseError, &status);
/* TODO : compare output with expected*/
if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){
log_err("Did not get the expected result for %s null terminated source with both options set.\n",testName);
}
}else{
log_err( "%s null terminated source failed. Requires destCapacity > 300\n",testName);
}
}
/*testing query string*/
if(status != expectedStatus){
log_err( "Did not get the expected error for %s null terminated source with options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
}
status = U_ZERO_ERROR;
destLen = func(tSrc,tSrcLen,NULL,0,options | UIDNA_USE_STD3_RULES, &parseError, &status);
if(status == U_BUFFER_OVERFLOW_ERROR){
status = U_ZERO_ERROR; /* reset error code*/
if(destLen+1 < MAX_DEST_SIZE){
dest = destStack;
destLen = func(src,u_strlen(src),dest,destLen+1,options | UIDNA_USE_STD3_RULES, &parseError, &status);
/* TODO : compare output with expected*/
if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){
log_err("Did not get the expected result for %s with source length and both options set.\n",testName);
}
}else{
log_err( "%s with source length failed. Requires destCapacity > 300\n",testName);
}
}
/*testing query string*/
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_ERROR){
log_err( "Did not get the expected error for %s with source length and options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
}
}
free(tSrc);
}
static UChar unicodeIn[][41] ={
{
0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, 0x0644,
0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, 0x061F, 0x0000
},
{
0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587,
0x0000
},
{
0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, 0x0074,
0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, 0x00ED, 0x010D,
0x0065, 0x0073, 0x006B, 0x0079, 0x0000
},
{
0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, 0x05D8,
0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x05DD, 0x05E2,
0x05D1, 0x05E8, 0x05D9, 0x05EA, 0x0000
},
{
0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, 0x094D,
0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, 0x0928, 0x0939,
0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, 0x0915, 0x0924, 0x0947,
0x0939, 0x0948, 0x0902, 0x0000
},
{
0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, 0x3092,
0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, 0x306E, 0x304B,
0x0000
},
/*
{
0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C, 0x0000
},
*/
{
0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, 0x043E,
0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, 0x043E, 0x0440,
0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, 0x0441, 0x0441, 0x043A,
0x0438, 0x0000
},
{
0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, 0x0070,
0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, 0x006D, 0x0070,
0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, 0x0065, 0x0068, 0x0061,
0x0062, 0x006C, 0x0061, 0x0072, 0x0065, 0x006E, 0x0045, 0x0073, 0x0070,
0x0061, 0x00F1, 0x006F, 0x006C, 0x0000
},
{
0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587,
0x0000
},
{
0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, 0x006B,
0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, 0x0063, 0x0068,
0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, 0x1EBF, 0x006E, 0x0067,
0x0056, 0x0069, 0x1EC7, 0x0074, 0x0000
},
{
0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F, 0x0000
},
{
0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, 0x0074,
0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, 0x002D, 0x004D,
0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053, 0x0000
},
{
0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, 0x006F,
0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, 0x0079, 0x002D,
0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, 0x6240, 0x0000
},
{
0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032, 0x0000
},
{
0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, 0x3059,
0x308B, 0x0035, 0x79D2, 0x524D, 0x0000
},
{
0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0,
0x0000
},
{
0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067, 0x0000
},
/* test non-BMP code points */
{
0xD800, 0xDF00, 0xD800, 0xDF01, 0xD800, 0xDF02, 0xD800, 0xDF03, 0xD800, 0xDF05,
0xD800, 0xDF06, 0xD800, 0xDF07, 0xD800, 0xDF09, 0xD800, 0xDF0A, 0xD800, 0xDF0B,
0x0000
},
{
0xD800, 0xDF0D, 0xD800, 0xDF0C, 0xD800, 0xDF1E, 0xD800, 0xDF0F, 0xD800, 0xDF16,
0xD800, 0xDF15, 0xD800, 0xDF14, 0xD800, 0xDF12, 0xD800, 0xDF10, 0xD800, 0xDF20,
0xD800, 0xDF21,
0x0000
},
/* Greek */
{
0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac
},
/* Maltese */
{
0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
0x0127, 0x0061
},
/* Russian */
{
0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435,
0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432,
0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443,
0x0441, 0x0441, 0x043a, 0x0438
}
};
static const char *asciiIn[] = {
"xn--egbpdaj6bu4bxfgehfvwxn",
"xn--ihqwcrb4cv8a8dqg056pqjye",
"xn--Proprostnemluvesky-uyb24dma41a",
"xn--4dbcagdahymbxekheh6e0a7fei0b",
"xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd",
"xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa",
/* "xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c",*/
"xn--b1abfaaepdrnnbgefbaDotcwatmq2g4l",
"xn--PorqunopuedensimplementehablarenEspaol-fmd56a",
"xn--ihqwctvzc91f659drss3x8bo0yb",
"xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g",
"xn--3B-ww4c5e180e575a65lsy2b",
"xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n",
"xn--Hello-Another-Way--fc4qua05auwb3674vfr0b",
"xn--2-u9tlzr9756bt3uc0v",
"xn--MajiKoi5-783gue6qz075azm5e",
"xn--de-jg4avhby1noc0d",
"xn--d9juau41awczczp",
"XN--097CCDEKGHQJK",
"XN--db8CBHEJLGH4E0AL",
"xn--hxargifdar", /* Greek */
"xn--bonusaa-5bb1da", /* Maltese */
"xn--b1abfaaepdrnnbgefbadotcwatmq2g4l", /* Russian (Cyrillic)*/
};
static const char *domainNames[] = {
"slip129-37-118-146.nc.us.ibm.net",
"saratoga.pe.utexas.edu",
"dial-120-45.ots.utexas.edu",
"woo-085.dorms.waller.net",
"hd30-049.hil.compuserve.com",
"pem203-31.pe.ttu.edu",
"56K-227.MaxTNT3.pdq.net",
"dial-36-2.ots.utexas.edu",
"slip129-37-23-152.ga.us.ibm.net",
"ts45ip119.cadvision.com",
"sdn-ts-004txaustP05.dialsprint.net",
"bar-tnt1s66.erols.com",
"101.st-louis-15.mo.dial-access.att.net",
"h92-245.Arco.COM",
"dial-13-2.ots.utexas.edu",
"net-redynet29.datamarkets.com.ar",
"ccs-shiva28.reacciun.net.ve",
"7.houston-11.tx.dial-access.att.net",
"ingw129-37-120-26.mo.us.ibm.net",
"dialup6.austintx.com",
"dns2.tpao.gov.tr",
"slip129-37-119-194.nc.us.ibm.net",
"cs7.dillons.co.uk.203.119.193.in-addr.arpa",
"swprd1.innovplace.saskatoon.sk.ca",
"bikini.bologna.maraut.it",
"node91.subnet159-198-79.baxter.com",
"cust19.max5.new-york.ny.ms.uu.net",
"balexander.slip.andrew.cmu.edu",
"pool029.max2.denver.co.dynip.alter.net",
"cust49.max9.new-york.ny.ms.uu.net",
"s61.abq-dialin2.hollyberry.com",
"http://\\u0917\\u0928\\u0947\\u0936.sanjose.ibm.com",
"www.xn--vea.com",
"www.\\u00E0\\u00B3\\u00AF.com",
"www.\\u00C2\\u00A4.com",
"www.\\u00C2\\u00A3.com",
"\\u0025",
"\\u005C\\u005C",
"@",
"\\u002F",
"www.\\u0021.com",
"www.\\u0024.com",
"\\u003f",
/* These yeild U_IDNA_PROHIBITED_ERROR
//"\\u00CF\\u0082.com",
//"\\u00CE\\u00B2\\u00C3\\u009Fss.com",
//"\\u00E2\\u0098\\u00BA.com",*/
"\\u00C3\\u00BC.com",
};
static void
TestToASCII(){
int32_t i;
UChar buf[MAX_DEST_SIZE];
const char* testName = "uidna_toASCII";
TestFunc func = uidna_toASCII;
for(i=0;i< (int32_t)(sizeof(unicodeIn)/sizeof(unicodeIn[0])); i++){
u_charsToUChars(asciiIn[i],buf, strlen(asciiIn[i])+1);
testAPI(unicodeIn[i], buf,testName, FALSE,U_ZERO_ERROR, TRUE, TRUE, func);
}
}
static void
TestToUnicode(){
int32_t i;
UChar buf[MAX_DEST_SIZE];
const char* testName = "uidna_toUnicode";
TestFunc func = uidna_toUnicode;
for(i=0;i< (int32_t)(sizeof(asciiIn)/sizeof(asciiIn[0])); i++){
u_charsToUChars(asciiIn[i],buf, strlen(asciiIn[i])+1);
testAPI(buf,unicodeIn[i],testName,FALSE,U_ZERO_ERROR, TRUE, TRUE, func);
}
}
static void
TestIDNToUnicode(){
int32_t i;
UChar buf[MAX_DEST_SIZE];
UChar expected[MAX_DEST_SIZE];
UErrorCode status = U_ZERO_ERROR;
int32_t bufLen = 0;
UParseError parseError;
const char* testName="uidna_IDNToUnicode";
TestFunc func = uidna_IDNToUnicode;
for(i=0;i< (int32_t)(sizeof(domainNames)/sizeof(domainNames[0])); i++){
bufLen = strlen(domainNames[i]);
bufLen = u_unescape(domainNames[i],buf, bufLen+1);
func(buf,bufLen,expected,MAX_DEST_SIZE, UIDNA_ALLOW_UNASSIGNED, &parseError,&status);
if(U_FAILURE(status)){
log_err( "%s failed to convert domainNames[%i].Error: %s \n",testName, i, u_errorName(status));
break;
}
testAPI(buf,expected,testName,FALSE,U_ZERO_ERROR, TRUE, TRUE, func);
/*test toUnicode with all labels in the string*/
testAPI(buf,expected,testName, FALSE,U_ZERO_ERROR, TRUE, TRUE, func);
if(U_FAILURE(status)){
log_err( "%s failed to convert domainNames[%i].Error: %s \n",testName,i, u_errorName(status));
break;
}
}
}
static void
TestIDNToASCII(){
int32_t i;
UChar buf[MAX_DEST_SIZE];
UChar expected[MAX_DEST_SIZE];
UErrorCode status = U_ZERO_ERROR;
int32_t bufLen = 0;
UParseError parseError;
const char* testName="udina_IDNToASCII";
TestFunc func=uidna_IDNToASCII;
for(i=0;i< (int32_t)(sizeof(domainNames)/sizeof(domainNames[0])); i++){
bufLen = strlen(domainNames[i]);
bufLen = u_unescape(domainNames[i],buf, bufLen+1);
func(buf,bufLen,expected,MAX_DEST_SIZE, UIDNA_ALLOW_UNASSIGNED, &parseError,&status);
if(U_FAILURE(status)){
log_err( "%s failed to convert domainNames[%i].Error: %s \n",testName,i, u_errorName(status));
break;
}
testAPI(buf,expected,testName, FALSE,U_ZERO_ERROR, TRUE, TRUE, func);
/*test toASCII with all labels in the string*/
testAPI(buf,expected,testName, FALSE,U_ZERO_ERROR, FALSE, TRUE, func);
if(U_FAILURE(status)){
log_err( "%s failed to convert domainNames[%i].Error: %s \n",testName,i, u_errorName(status));
break;
}
}
}
static void
testCompareWithSrc(const UChar* s1, int32_t s1Len,
const UChar* s2, int32_t s2Len,
const char* testName, CompareFunc func,
UBool isEqual){
UErrorCode status = U_ZERO_ERROR;
int32_t retVal = func(s1,-1,s2,-1,UIDNA_DEFAULT,&status);
if(isEqual==TRUE && retVal !=0){
log_err("Did not get the expected result for %s with null termniated strings.\n",testName);
}
if(U_FAILURE(status)){
log_err( "%s null terminated source failed. Error: %s\n", testName,u_errorName(status));
}
status = U_ZERO_ERROR;
retVal = func(s1,-1,s2,-1,UIDNA_ALLOW_UNASSIGNED,&status);
if(isEqual==TRUE && retVal !=0){
log_err("Did not get the expected result for %s with null termniated strings with options set.\n", testName);
}
if(U_FAILURE(status)){
log_err( "%s null terminated source and options set failed. Error: %s\n",testName, u_errorName(status));
}
status = U_ZERO_ERROR;
retVal = func(s1,s1Len,s2,s2Len,UIDNA_DEFAULT,&status);
if(isEqual==TRUE && retVal !=0){
log_err("Did not get the expected result for %s with string length.\n",testName);
}
if(U_FAILURE(status)){
log_err( "%s with string length. Error: %s\n",testName, u_errorName(status));
}
status = U_ZERO_ERROR;
retVal = func(s1,s1Len,s2,s2Len,UIDNA_ALLOW_UNASSIGNED,&status);
if(isEqual==TRUE && retVal !=0){
log_err("Did not get the expected result for %s with string length and options set.\n",testName);
}
if(U_FAILURE(status)){
log_err( "%s with string length and options set. Error: %s\n", u_errorName(status), testName);
}
}
static UChar*
u_strcatChars(UChar *dst,
const char *src)
{
UChar *anchor = dst; /* save a pointer to start of dst */
while(*dst != 0) { /* To end of first string */
++dst;
}
while((*(dst++) = (UChar)*(src++)) != 0) { /* copy string 2 over */
}
return anchor;
}
static void
TestCompare(){
int32_t i;
const char* testName ="uidna_compare";
CompareFunc func = uidna_compare;
UChar www[] = {0x0057, 0x0057, 0x0057, 0x002E, 0x0000};
UChar com[] = {0x002E, 0x0043, 0x004F, 0x004D, 0x0000};
UChar buf[MAX_DEST_SIZE]={0x0057, 0x0057, 0x0057, 0x002E, 0x0000};
UChar source[MAX_DEST_SIZE]={0},
uni0[MAX_DEST_SIZE]={0},
uni1[MAX_DEST_SIZE]={0},
ascii0[MAX_DEST_SIZE]={0},
ascii1[MAX_DEST_SIZE]={0};
u_strcat(uni0,unicodeIn[0]);
u_strcat(uni0,com);
u_strcat(uni1,unicodeIn[1]);
u_strcat(uni1,com);
u_strcatChars(ascii0,asciiIn[0]);
u_strcat(ascii0,com);
u_strcatChars(ascii1,asciiIn[1]);
u_strcat(ascii1,com);
u_strcat(source, buf);
for(i=0;i< (int32_t)(sizeof(unicodeIn)/sizeof(unicodeIn[0])); i++){
UChar* src;
int32_t srcLen;
u_charsToUChars(asciiIn[i],buf+4, strlen(asciiIn[i]));
u_strcat(buf,com);
/* for every entry in unicodeIn array
prepend www. and append .com*/
source[4]=0;
u_strcat(source,unicodeIn[i]);
u_strcat(source,com);
/* a) compare it with itself*/
src = source;
srcLen = u_strlen(src);
testCompareWithSrc(src,srcLen,src,srcLen,testName, func, TRUE);
/* b) compare it with asciiIn equivalent */
/*testCompareWithSrc(src,srcLen,buf,u_strlen(buf),testName, func,TRUE);*/
/* c) compare it with unicodeIn not equivalent*/
if(i==0){
testCompareWithSrc(src,srcLen,uni1,u_strlen(uni1),testName, func,FALSE);
}else{
testCompareWithSrc(src,srcLen,uni0,u_strlen(uni0),testName, func,FALSE);
}
/* d) compare it with asciiIn not equivalent */
if(i==0){
testCompareWithSrc(src,srcLen,ascii1,u_strlen(ascii1),testName, func,FALSE);
}else{
testCompareWithSrc(src,srcLen,ascii0,u_strlen(ascii0),testName, func,FALSE);
}
}
}
#endif
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/

View file

@ -0,0 +1,342 @@
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: nfsprep.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003jul11
* created by: Ram Viswanadha
*/
#if !UCONFIG_NO_IDNA
#include "nfsprep.h"
#include "cstring.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
#define NFS4_MAX_BUFFER_SIZE 1000
#define PREFIX_SUFFIX_SEPARATOR 0x0040 /* '@' */
const char* NFS4DataFileNames[5] ={
"nfscss",
"nfscsi",
"nfscis",
"nfsmxp",
"nfsmxs"
};
int32_t
nfs4_prepare( const char* src, int32_t srcLength,
char* dest, int32_t destCapacity,
NFS4ProfileState state,
UParseError* parseError,
UErrorCode* status){
UChar b1Stack[NFS4_MAX_BUFFER_SIZE],
b2Stack[NFS4_MAX_BUFFER_SIZE];
char b3Stack[NFS4_MAX_BUFFER_SIZE];
//initialize pointers to stack buffers
UChar *b1 = b1Stack, *b2 = b2Stack;
char *b3=b3Stack;
int32_t b1Len=0, b2Len=0, b3Len=0,
b1Capacity = NFS4_MAX_BUFFER_SIZE,
b2Capacity = NFS4_MAX_BUFFER_SIZE,
b3Capacity = NFS4_MAX_BUFFER_SIZE,
reqLength=0;
UStringPrepProfile* profile = NULL;
/* get the test data path */
const char *testdatapath = NULL;
if(status==NULL || U_FAILURE(*status)){
return 0;
}
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
testdatapath = loadTestData(status);
/* convert the string from UTF-8 to UTF-16 */
u_strFromUTF8(b1,b1Capacity,&b1Len,src,srcLength,status);
if(*status == U_BUFFER_OVERFLOW_ERROR){
/* reset the status */
*status = U_ZERO_ERROR;
b1 = (UChar*) malloc(b1Len * U_SIZEOF_UCHAR);
if(b1==NULL){
*status = U_MEMORY_ALLOCATION_ERROR;
goto CLEANUP;
}
b1Capacity = b1Len;
u_strFromUTF8(b1, b1Capacity, &b1Len, src, srcLength, status);
}
/* open the profile */
profile = usprep_open(testdatapath, NFS4DataFileNames[state], status);
/* prepare the string */
b2Len = usprep_prepare(profile, b1, b1Len, b2, b2Capacity, USPREP_NONE, parseError, status);
if(*status == U_BUFFER_OVERFLOW_ERROR){
*status = U_ZERO_ERROR;
b2 = (UChar*) malloc(b2Len * U_SIZEOF_UCHAR);
if(b2== NULL){
*status = U_MEMORY_ALLOCATION_ERROR;
goto CLEANUP;
}
b2Len = usprep_prepare(profile, b1, b1Len, b2, b2Len, USPREP_NONE, parseError, status);
}
/* convert the string back to UTF-8 */
u_strToUTF8(b3,b3Capacity, &b3Len, b2, b2Len, status);
if(*status == U_BUFFER_OVERFLOW_ERROR){
*status = U_ZERO_ERROR;
b3 = (char*) malloc(b3Len);
if(b3== NULL){
*status = U_MEMORY_ALLOCATION_ERROR;
goto CLEANUP;
}
b3Capacity = b3Len;
u_strToUTF8(b3,b3Capacity, &b3Len, b2, b2Len, status);
}
reqLength = b3Len;
if(dest!=NULL && reqLength <= destCapacity){
memmove(dest, b3, reqLength);
}
CLEANUP:
if(b1!=b1Stack){
free(b1);
}
if(b2!=b2Stack){
free(b2);
}
if(b3!=b3Stack){
free(b3);
}
return u_terminateChars(dest, destCapacity, reqLength, status);
}
static void
syntaxError( const UChar* rules,
int32_t pos,
int32_t rulesLen,
UParseError* parseError){
int32_t start, stop;
if(parseError == NULL){
return;
}
if(pos == rulesLen && rulesLen >0){
pos--;
}
parseError->offset = pos;
parseError->line = 0 ; // we are not using line numbers
// for pre-context
start = (pos <=U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
stop = pos;
u_memcpy(parseError->preContext,rules+start,stop-start);
//null terminate the buffer
parseError->preContext[stop-start] = 0;
//for post-context
start = pos;
if(start<rulesLen) {
U16_FWD_1(rules, start, rulesLen);
}
stop = ((pos+U_PARSE_CONTEXT_LEN)<= rulesLen )? (pos+(U_PARSE_CONTEXT_LEN)) :
rulesLen;
if(start < stop){
u_memcpy(parseError->postContext,rules+start,stop-start);
//null terminate the buffer
parseError->postContext[stop-start]= 0;
}
}
/* sorted array for binary search*/
static const char* special_prefixes[]={
"ANONYMOUS",
"AUTHENTICATED"
"BATCH",
"DIALUP",
"EVERYONE",
"GROUP",
"INTERACTIVE",
"NETWORK",
"OWNER",
};
/* binary search the sorted array */
static int
findStringIndex(const char* const *sortedArr, int32_t sortedArrLen, const char* target, int32_t targetLen){
int left, middle, right,rc;
left =0;
right= sortedArrLen-1;
while(left <= right){
middle = (left+right)/2;
rc=strncmp(sortedArr[middle],target, targetLen);
if(rc<0){
left = middle+1;
}else if(rc >0){
right = middle -1;
}else{
return middle;
}
}
return -1;
}
static void
getPrefixSuffix(const char *src, int32_t srcLength,
const char **prefix, int32_t *prefixLen,
const char **suffix, int32_t *suffixLen,
UErrorCode *status){
int32_t i=0;
*prefix = src;
while(i<srcLength){
if(src[i] == PREFIX_SUFFIX_SEPARATOR){
if((i+1) == srcLength){
/* we reached the end of the string */
*suffix = NULL;
i++;
break;
}
i++;/* the prefix contains the separator */
*suffix = src + i;
break;
}
i++;
}
*prefixLen = i;
*suffixLen = srcLength - i;
/* special prefixes must not be followed by suffixes! */
if((findStringIndex(special_prefixes,LENGTHOF(special_prefixes), *prefix, *prefixLen-1) != -1) && (*suffix != NULL)){
*status = U_PARSE_ERROR;
return;
}
}
int32_t
nfs4_mixed_prepare( const char* src, int32_t srcLength,
char* dest, int32_t destCapacity,
UParseError* parseError,
UErrorCode* status){
const char *prefix = NULL, *suffix = NULL;
int32_t prefixLen=0, suffixLen=0;
char pStack[NFS4_MAX_BUFFER_SIZE],
sStack[NFS4_MAX_BUFFER_SIZE];
char *p=pStack, *s=sStack;
int32_t pLen=0, sLen=0, reqLen=0,
pCapacity = NFS4_MAX_BUFFER_SIZE,
sCapacity = NFS4_MAX_BUFFER_SIZE;
if(status==NULL || U_FAILURE(*status)){
return 0;
}
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if(srcLength == -1){
srcLength = uprv_strlen(src);
}
getPrefixSuffix(src, srcLength, &prefix, &prefixLen, &suffix, &suffixLen, status);
/* prepare the prefix */
pLen = nfs4_prepare(prefix, prefixLen, p, pCapacity, NFS4_MIXED_PREP_PREFIX, parseError, status);
if(*status == U_BUFFER_OVERFLOW_ERROR){
*status = U_ZERO_ERROR;
p = (char*) malloc(pLen);
if(p == NULL){
*status = U_MEMORY_ALLOCATION_ERROR;
goto CLEANUP;
}
pLen = nfs4_prepare(prefix, prefixLen, p, pLen, NFS4_MIXED_PREP_PREFIX, parseError, status);
}
/* prepare the suffix */
if(suffix != NULL){
sLen = nfs4_prepare(suffix, suffixLen, s, sCapacity, NFS4_MIXED_PREP_SUFFIX, parseError, status);
if(*status == U_BUFFER_OVERFLOW_ERROR){
*status = U_ZERO_ERROR;
s = (char*) malloc(pLen);
if(s == NULL){
*status = U_MEMORY_ALLOCATION_ERROR;
goto CLEANUP;
}
sLen = nfs4_prepare(suffix, suffixLen, s, sLen, NFS4_MIXED_PREP_SUFFIX, parseError, status);
}
}
reqLen = pLen+sLen;
if(dest != NULL && reqLen <= destCapacity){
memmove(dest, p, pLen);
memmove(dest+pLen, s, sLen);
}
CLEANUP:
if(p != pStack){
free(p);
}
if(s != sStack){
free(s);
}
return u_terminateChars(dest, destCapacity, reqLen, status);
}
int32_t
nfs4_cis_prepare( const char* src, int32_t srcLength,
char* dest, int32_t destCapacity,
UParseError* parseError,
UErrorCode* status){
return nfs4_prepare(src, srcLength, dest, destCapacity, NFS4_CIS_PREP, parseError, status);
}
int32_t
nfs4_cs_prepare( const char* src, int32_t srcLength,
char* dest, int32_t destCapacity,
UBool isCaseSensitive,
UParseError* parseError,
UErrorCode* status){
if(isCaseSensitive){
return nfs4_prepare(src, srcLength, dest, destCapacity, NFS4_CS_PREP_CS, parseError, status);
}else{
return nfs4_prepare(src, srcLength, dest, destCapacity, NFS4_CS_PREP_CI, parseError, status);
}
}
#endif
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/

View file

@ -0,0 +1,113 @@
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: nfsprep.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003jul11
* created by: Ram Viswanadha
*/
#ifndef _NFSPREP_H
#define _NFSPREP_H
#if !UCONFIG_NO_IDNA
#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "ustr_imp.h"
#include "cintltst.h"
#include "unicode/usprep.h"
#include <stdlib.h>
#include <string.h>
enum NFS4ProfileState{
NFS4_CS_PREP_CS,
NFS4_CS_PREP_CI,
NFS4_CIS_PREP,
NFS4_MIXED_PREP_PREFIX,
NFS4_MIXED_PREP_SUFFIX
};
typedef enum NFS4ProfileState NFS4ProfileState;
/**
* Prepares the source UTF-8 string for use in file names and
* returns UTF-8 string on output.
* @param src
* @param srcLen
* @param dest
* @param destCapacity
* @param state
* @param parseError
* @param status
*/
int32_t
nfs4_prepare(const char* src, int32_t srcLength,
char* dest, int32_t destCapacity,
NFS4ProfileState state,
UParseError* parseError,
UErrorCode* status);
/**
* @param dest
* @param destCapacity
* @param src
* @param srcLen
* @param state
* @param parseError
* @param status
*/
int32_t
nfs4_mixed_prepare( const char* src, int32_t srcLength,
char* dest, int32_t destCapacity,
UParseError* parseError,
UErrorCode* status);
/**
* @param dest
* @param destCapacity
* @param src
* @param srcLen
* @param state
* @param parseError
* @param status
*/
int32_t
nfs4_cis_prepare( const char* src, int32_t srcLength,
char* dest, int32_t destCapacity,
UParseError* parseError,
UErrorCode* status);
/**
* @param dest
* @param destCapacity
* @param src
* @param srcLen
* @param state
* @param parseError
* @param status
*/
int32_t
nfs4_cs_prepare( const char* src, int32_t srcLength,
char* dest, int32_t destCapacity,
UBool isCaseSensitive,
UParseError* parseError,
UErrorCode* status);
#endif
#endif
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/

View file

@ -0,0 +1,450 @@
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: spreptst.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003jul11
* created by: Ram Viswanadha
*/
#if !UCONFIG_NO_IDNA
#include <stdlib.h>
#include <string.h>
#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "unicode/usprep.h"
#include "cintltst.h"
#include "nfsprep.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
void addUStringPrepTest(TestNode** root);
void doStringPrepTest(const char* binFileName, const char* txtFileName,
int32_t options, UErrorCode* errorCode);
static void Test_nfs4_cs_prep_data(void);
static void Test_nfs4_cis_prep_data(void);
static void Test_nfs4_mixed_prep_data(void);
static void Test_nfs4_cs_prep(void);
static void Test_nfs4_cis_prep(void);
static void Test_nfs4_mixed_prep(void);
void
addUStringPrepTest(TestNode** root)
{
addTest(root, &Test_nfs4_cs_prep_data, "spreptst/Test_nfs4_cs_prep_data");
addTest(root, &Test_nfs4_cis_prep_data, "spreptst/Test_nfs4_cis_prep_data");
addTest(root, &Test_nfs4_mixed_prep_data, "spreptst/Test_nfs4_mixed_prep_data");
/*addTest(root, &Test_nfs4_cs_prep, "spreptst/Test_nfs4_cs_prep");*/
addTest(root, &Test_nfs4_cis_prep, "spreptst/Test_nfs4_cis_prep");
addTest(root, &Test_nfs4_mixed_prep, "spreptst/Test_nfs4_mixed_prep");
}
static void
Test_nfs4_cs_prep_data(void){
UErrorCode errorCode = U_ZERO_ERROR;
log_verbose("Testing nfs4_cs_prep_ci.txt\n");
doStringPrepTest("nfscsi","nfs4_cs_prep_ci.txt", USPREP_NONE, &errorCode);
log_verbose("Testing nfs4_cs_prep_cs.txt\n");
errorCode = U_ZERO_ERROR;
doStringPrepTest("nfscss","nfs4_cs_prep_cs.txt", USPREP_NONE, &errorCode);
}
static void
Test_nfs4_cis_prep_data(void){
UErrorCode errorCode = U_ZERO_ERROR;
log_verbose("Testing nfs4_cis_prep.txt\n");
doStringPrepTest("nfscis","nfs4_cis_prep.txt", USPREP_NONE, &errorCode);
}
static void
Test_nfs4_mixed_prep_data(void){
UErrorCode errorCode = U_ZERO_ERROR;
log_verbose("Testing nfs4_mixed_prep_s.txt\n");
doStringPrepTest("nfsmxs","nfs4_mixed_prep_s.txt", USPREP_NONE, &errorCode);
errorCode = U_ZERO_ERROR;
log_verbose("Testing nfs4_mixed_prep_p.txt\n");
doStringPrepTest("nfsmxp","nfs4_mixed_prep_p.txt", USPREP_NONE, &errorCode);
}
static struct ConformanceTestCases
{
const char *comment;
const char *in;
const char *out;
const char *profile;
UErrorCode expectedStatus;
}
conformanceTestCases[] =
{
{
"Case folding ASCII U+0043 U+0041 U+0046 U+0045",
"\x43\x41\x46\x45", "\x63\x61\x66\x65",
"nfs4_cis_prep",
U_ZERO_ERROR
},
{
"Case folding 8bit U+00DF (german sharp s)",
"\xC3\x9F", "\x73\x73",
"nfs4_cis_prep",
U_ZERO_ERROR
},
{
"Non-ASCII multibyte space character U+1680",
"\xE1\x9A\x80", NULL,
"nfs4_cis_prep",
U_STRINGPREP_PROHIBITED_ERROR
},
{
"Non-ASCII 8bit control character U+0085",
"\xC2\x85", NULL,
"nfs4_cis_prep",
U_STRINGPREP_PROHIBITED_ERROR
},
{
"Non-ASCII multibyte control character U+180E",
"\xE1\xA0\x8E", NULL,
"nfs4_cis_prep",
U_STRINGPREP_PROHIBITED_ERROR
},
{
"Non-ASCII control character U+1D175",
"\xF0\x9D\x85\xB5", NULL,
"nfs4_cis_prep",
U_STRINGPREP_PROHIBITED_ERROR
},
{
"Plane 0 private use character U+F123",
"\xEF\x84\xA3", NULL,
"nfs4_cis_prep",
U_STRINGPREP_PROHIBITED_ERROR
},
{
"Plane 15 private use character U+F1234",
"\xF3\xB1\x88\xB4", NULL,
"nfs4_cis_prep",
U_STRINGPREP_PROHIBITED_ERROR
},
{
"Plane 16 private use character U+10F234",
"\xF4\x8F\x88\xB4", NULL,
"nfs4_cis_prep",
U_STRINGPREP_PROHIBITED_ERROR
},
{
"Non-character code point U+8FFFE",
"\xF2\x8F\xBF\xBE", NULL,
"nfs4_cis_prep",
U_STRINGPREP_PROHIBITED_ERROR
},
{
"Non-character code point U+10FFFF",
"\xF4\x8F\xBF\xBF", NULL,
"nfs4_cis_prep",
U_STRINGPREP_PROHIBITED_ERROR
},
/*
{
"Surrogate code U+DF42",
"\xED\xBD\x82", NULL, "nfs4_cis_prep", UIDNA_DEFAULT,
U_STRINGPREP_PROHIBITED_ERROR
},
*/
{
"Non-plain text character U+FFFD",
"\xEF\xBF\xBD", NULL,
"nfs4_cis_prep",
U_STRINGPREP_PROHIBITED_ERROR
},
{
"Ideographic description character U+2FF5",
"\xE2\xBF\xB5", NULL,
"nfs4_cis_prep",
U_STRINGPREP_PROHIBITED_ERROR
},
{
"Display property character U+0341",
"\xCD\x81", "\xCC\x81",
"nfs4_cis_prep", U_ZERO_ERROR
},
{
"Left-to-right mark U+200E",
"\xE2\x80\x8E", "\xCC\x81",
"nfs4_cis_prep",
U_STRINGPREP_PROHIBITED_ERROR
},
{
"Deprecated U+202A",
"\xE2\x80\xAA", "\xCC\x81",
"nfs4_cis_prep",
U_STRINGPREP_PROHIBITED_ERROR
},
{
"Language tagging character U+E0001",
"\xF3\xA0\x80\x81", "\xCC\x81",
"nfs4_cis_prep",
U_STRINGPREP_PROHIBITED_ERROR
},
{
"Language tagging character U+E0042",
"\xF3\xA0\x81\x82", NULL,
"nfs4_cis_prep",
U_STRINGPREP_PROHIBITED_ERROR
},
{
"Bidi: RandALCat character U+05BE and LCat characters",
"\x66\x6F\x6F\xD6\xBE\x62\x61\x72", NULL,
"nfs4_cis_prep",
U_STRINGPREP_CHECK_BIDI_ERROR
},
{
"Bidi: RandALCat character U+FD50 and LCat characters",
"\x66\x6F\x6F\xEF\xB5\x90\x62\x61\x72", NULL,
"nfs4_cis_prep",
U_STRINGPREP_CHECK_BIDI_ERROR
},
{
"Bidi: RandALCat character U+FB38 and LCat characters",
"\x66\x6F\x6F\xEF\xB9\xB6\x62\x61\x72", "\x66\x6F\x6F \xd9\x8e\x62\x61\x72",
"nfs4_cis_prep",
U_ZERO_ERROR
},
{ "Bidi: RandALCat without trailing RandALCat U+0627 U+0031",
"\xD8\xA7\x31", NULL,
"nfs4_cis_prep",
U_STRINGPREP_CHECK_BIDI_ERROR
},
{
"Bidi: RandALCat character U+0627 U+0031 U+0628",
"\xD8\xA7\x31\xD8\xA8", "\xD8\xA7\x31\xD8\xA8",
"nfs4_cis_prep",
U_ZERO_ERROR
},
{
"Unassigned code point U+E0002",
"\xF3\xA0\x80\x82", NULL,
"nfs4_cis_prep",
U_STRINGPREP_UNASSIGNED_ERROR
},
/* // Invalid UTF-8
{
"Larger test (shrinking)",
"X\xC2\xAD\xC3\xDF\xC4\xB0\xE2\x84\xA1\x6a\xcc\x8c\xc2\xa0\xc2"
"\xaa\xce\xb0\xe2\x80\x80", "xssi\xcc\x87""tel\xc7\xb0 a\xce\xb0 ",
"nfs4_cis_prep",
U_ZERO_ERROR
},
{
"Larger test (expanding)",
"X\xC3\xDF\xe3\x8c\x96\xC4\xB0\xE2\x84\xA1\xE2\x92\x9F\xE3\x8c\x80",
"xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\x88"
"\xe3\x83\xab""i\xcc\x87""tel\x28""d\x29\xe3\x82\xa2\xe3\x83\x91"
"\xe3\x83\xbc\xe3\x83\x88"
"nfs4_cis_prep",
U_ZERO_ERROR
},
*/
};
static void Test_nfs4_cis_prep(void){
int32_t i=0;
for(i=0;i< (int32_t)(sizeof(conformanceTestCases)/sizeof(conformanceTestCases[0]));i++){
const char* src = conformanceTestCases[i].in;
UErrorCode status = U_ZERO_ERROR;
UParseError parseError;
UErrorCode expectedStatus = conformanceTestCases[i].expectedStatus;
const char* expectedDest = conformanceTestCases[i].out;
char* dest = NULL;
int32_t destLen = 0;
destLen = nfs4_cis_prepare(src , strlen(src), dest, destLen, &parseError, &status);
if(status == U_BUFFER_OVERFLOW_ERROR){
status = U_ZERO_ERROR;
dest = (char*) malloc(++destLen);
destLen = nfs4_cis_prepare( src , strlen(src), dest, destLen, &parseError, &status);
}
if(expectedStatus != status){
log_err("Did not get the expected status for nfs4_cis_prep at index %i. Expected: %s Got: %s\n",i, u_errorName(expectedStatus), u_errorName(status));
}
if(U_SUCCESS(status) && (strcmp(expectedDest,dest) !=0)){
log_err("Did not get the expected output for nfs4_cis_prep at index %i.\n", i);
}
}
}
/*
There are several special identifiers ("who") which need to be
understood universally, rather than in the context of a particular
DNS domain. Some of these identifiers cannot be understood when an
NFS client accesses the server, but have meaning when a local process
accesses the file. The ability to display and modify these
permissions is permitted over NFS, even if none of the access methods
on the server understands the identifiers.
Who Description
_______________________________________________________________
"OWNER" The owner of the file.
"GROUP" The group associated with the file.
"EVERYONE" The world.
"INTERACTIVE" Accessed from an interactive terminal.
"NETWORK" Accessed via the network.
"DIALUP" Accessed as a dialup user to the server.
"BATCH" Accessed from a batch job.
"ANONYMOUS" Accessed without any authentication.
"AUTHENTICATED" Any authenticated user (opposite of
ANONYMOUS)
"SERVICE" Access from a system service.
To avoid conflict, these special identifiers are distinguish by an
appended "@" and should appear in the form "xxxx@" (note: no domain
name after the "@"). For example: ANONYMOUS@.
*/
static const char* mixed_prep_data[] ={
"OWNER@",
"GROUP@",
"EVERYONE@",
"INTERACTIVE@",
"NETWORK@",
"DIALUP@",
"BATCH@",
"ANONYMOUS@",
"AUTHENTICATED@",
"\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D@slip129-37-118-146.nc.us.ibm.net",
"\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d@saratoga.pe.utexas.edu",
"\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e@dial-120-45.ots.utexas.edu",
"\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f@woo-085.dorms.waller.net",
"\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928@hd30-049.hil.compuserve.com",
"\\u0935\\u093f\\u0937\\u093e\\u0926@pem203-31.pe.ttu.edu",
"\\u092f\\u094b\\u0917@56K-227.MaxTNT3.pdq.net",
"\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930@dial-36-2.ots.utexas.edu",
"\\u0909\\u0935\\u093E\\u091A\\u0943@slip129-37-23-152.ga.us.ibm.net",
"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947@ts45ip119.cadvision.com",
"\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947@sdn-ts-004txaustP05.dialsprint.net",
"\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e@bar-tnt1s66.erols.com",
"\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903@101.st-louis-15.mo.dial-access.att.net",
"\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903@h92-245.Arco.COM",
"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935@dial-13-2.ots.utexas.edu",
"\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924@net-redynet29.datamarkets.com.ar",
"\\u0938\\u0902\\u091c\\u0935@ccs-shiva28.reacciun.net.ve",
"\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d@7.houston-11.tx.dial-access.att.net",
"\\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27@ingw129-37-120-26.mo.us.ibm.net",
"\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d@dialup6.austintx.com",
"\\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41@dns2.tpao.gov.tr",
"\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d@slip129-37-119-194.nc.us.ibm.net",
"\\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26@cs7.dillons.co.uk.203.119.193.in-addr.arpa",
"\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d@swprd1.innovplace.saskatoon.sk.ca",
"\\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26@bikini.bologna.maraut.it",
"\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d@node91.subnet159-198-79.baxter.com",
"\\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24@cust19.max5.new-york.ny.ms.uu.net",
"\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30@balexander.slip.andrew.cmu.edu",
"\\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32@pool029.max2.denver.co.dynip.alter.net",
"\\u0c30\\u0c35\\u0c3f@cust49.max9.new-york.ny.ms.uu.net",
"\\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d@s61.abq-dialin2.hollyberry.com",
"\\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27@\\u0917\\u0928\\u0947\\u0936.sanjose.ibm.com",
"\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f@www.\\u00E0\\u00B3\\u00AF.com",
"\\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32@www.\\u00C2\\u00A4.com",
"\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D@www.\\u00C2\\u00A3.com",
"\\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f@\\u0025",
"\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d@\\u005C\\u005C",
"\\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f@www.\\u0021.com",
"test@www.\\u0024.com",
"help@\\u00C3\\u00BC.com",
};
#define MAX_BUFFER_SIZE 1000
static int32_t
unescapeData(const char* src, int32_t srcLen,
char* dest, int32_t destCapacity,
UErrorCode* status){
UChar b1Stack[MAX_BUFFER_SIZE];
char b2Stack[MAX_BUFFER_SIZE];
int32_t b1Capacity = MAX_BUFFER_SIZE,
b2Capacity = MAX_BUFFER_SIZE,
b1Len = 0,
b2Len = 0;
UChar* b1 = b1Stack;
char* b2 = b2Stack;
b1Len = u_unescape(src,b1,b1Capacity);
u_strToUTF8(b2, b2Capacity, &b2Len, b1, b1Len, status);
if(U_SUCCESS(*status) && b2Len <= destCapacity){
memmove(dest, b2, b2Len);
}
return b2Len;
}
static void
Test_nfs4_mixed_prep(void){
int32_t i=0;
char src[MAX_BUFFER_SIZE];
int32_t srcLen;
for(i=0; i< LENGTHOF(mixed_prep_data); i++){
int32_t destLen=0;
char* dest = NULL;
UErrorCode status = U_ZERO_ERROR;
UParseError parseError;
srcLen = unescapeData(mixed_prep_data[i], strlen(mixed_prep_data[i]), src, MAX_BUFFER_SIZE, &status);
if(U_FAILURE(status)){
log_err("Conversion of data at index %i failed. Error: %s\n", i, u_errorName(status));
continue;
}
destLen = nfs4_mixed_prepare(src, srcLen, NULL, 0, &parseError, &status);
if(status == U_BUFFER_OVERFLOW_ERROR){
status = U_ZERO_ERROR;
dest = (char*)malloc(++destLen);
destLen = nfs4_mixed_prepare(src, srcLen, dest, destLen, &parseError, &status);
}
free(dest);
if(U_FAILURE(status)){
log_err("Preparation of string at index %i failed. Error: %s\n", i, u_errorName(status));
continue;
}
}
/* test the error condition */
{
const char* src = "OWNER@oss.software.ibm.com";
char dest[MAX_BUFFER_SIZE];
UErrorCode status = U_ZERO_ERROR;
UParseError parseError;
int32_t destLen = nfs4_mixed_prepare(src, srcLen, dest, MAX_BUFFER_SIZE, &parseError, &status);
if(status != U_PARSE_ERROR){
log_err("Did not get the expected error.Expected: %s Got: %s\n", u_errorName(U_PARSE_ERROR), u_errorName(status));
}
}
}
#endif
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/

View file

@ -0,0 +1,302 @@
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: spreptst.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003jul11
* created by: Ram Viswanadha
*/
#if !UCONFIG_NO_IDNA
#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "cintltst.h"
#include "unicode/usprep.h"
#include "sprpimpl.h"
#include "uparse.h"
#include "cmemory.h"
#include "ustr_imp.h"
#include "cstring.h"
static void
parseMappings(const char *filename, UStringPrepProfile* data, UBool reportError, UErrorCode *pErrorCode);
static void
compareMapping(UStringPrepProfile* data, uint32_t codepoint, uint32_t* mapping, int32_t mapLength,
UStringPrepType option);
static void
compareFlagsForRange(UStringPrepProfile* data, uint32_t start, uint32_t end,UStringPrepType option);
static void U_CALLCONV
strprepProfileLineFn(void *context,
char *fields[][2], int32_t fieldCount,
UErrorCode *pErrorCode) {
uint32_t mapping[40];
char *end, *map;
uint32_t code;
int32_t length;
UStringPrepProfile* data = (UStringPrepProfile*) context;
const char* typeName;
uint32_t rangeStart=0,rangeEnd =0;
const char* filename = (const char*) context;
typeName = fields[2][0];
map = fields[1][0];
if(strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
/* store the range */
compareFlagsForRange(data, rangeStart,rangeEnd,USPREP_UNASSIGNED);
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
/* store the range */
compareFlagsForRange(data, rangeStart,rangeEnd,USPREP_PROHIBITED);
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
/* get the character code, field 0 */
code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
/* parse the mapping string */
length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
/* compare the mapping */
compareMapping(data, code,mapping, length,USPREP_MAP);
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_LABEL_SEPARATOR])!=NULL){
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
/* compare the range */
compareFlagsForRange(data, rangeStart,rangeEnd,USPREP_LABEL_SEPARATOR);
}else{
*pErrorCode = U_INVALID_FORMAT_ERROR;
}
}
static void
parseMappings(const char *filename, UStringPrepProfile* data, UBool reportError, UErrorCode *pErrorCode) {
char *fields[3][2];
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return;
}
u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)data, pErrorCode);
//fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);
if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
log_err( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
}
}
static UStringPrepType
getValues(uint32_t result, int32_t* value, UBool* isIndex){
UStringPrepType type;
if(result == 0){
/*
* Initial value stored in the mapping table
* just return USPREP_TYPE_LIMIT .. so that
* the source codepoint is copied to the destination
*/
type = USPREP_TYPE_LIMIT;
}else if(result >= _SPREP_TYPE_THRESHOLD){
type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD);
}else{
/* get the type */
type = USPREP_MAP;
/* ascertain if the value is index or delta */
if(result & 0x02){
*isIndex = TRUE;
*value = result >> 2;
}else{
*isIndex = FALSE;
*value = (int16_t)result;
*value = (*value >> 2);
}
if((result>>2) == _SPREP_MAX_INDEX_VALUE){
type = USPREP_DELETE;
isIndex =FALSE;
value = 0;
}
}
return type;
}
static void
compareMapping(UStringPrepProfile* data, uint32_t codepoint, uint32_t* mapping,int32_t mapLength,
UStringPrepType type){
uint32_t result = 0;
int32_t length=0;
UBool isIndex = FALSE;
UStringPrepType retType;
int32_t value=0, index=0, delta=0;
int32_t* indexes = data->indexes;
UTrie trie = data->sprepTrie;
const uint16_t* mappingData = data->mappingData;
int32_t realLength =0;
int32_t j=0;
int8_t i=0;
UTRIE_GET16(&trie, codepoint, result);
retType = getValues(result,&value,&isIndex);
if(type != retType && retType != USPREP_DELETE){
log_err( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type);
}
if(isIndex){
index = value;
if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
length = 1;
}else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
length = 2;
}else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
length = 3;
}else{
length = mappingData[index++];
}
}else{
delta = value;
length = (retType == USPREP_DELETE)? 0 : 1;
}
/* figure out the real length */
for(j=0; j<mapLength; j++){
if(mapping[j] > 0xFFFF){
realLength +=2;
}else{
realLength++;
}
}
if(realLength != length){
log_err( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length);
}
if(isIndex){
for(i =0; i< mapLength; i++){
if(mapping[i] <= 0xFFFF){
if(mappingData[index+i] != (uint16_t)mapping[i]){
log_err("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]);
}
}else{
UChar lead = UTF16_LEAD(mapping[i]);
UChar trail = UTF16_TRAIL(mapping[i]);
if(mappingData[index+i] != lead ||
mappingData[index+i+1] != trail){
log_err( "Did not get the expected result. Expected: 0x%04X 0x%04X Got: 0x%04X 0x%04X\n", lead, trail, mappingData[index+i], mappingData[index+i+1]);
}
}
}
}else{
if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){
log_err("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta));
}
}
}
static void
compareFlagsForRange(UStringPrepProfile* data,
uint32_t start, uint32_t end,
UStringPrepType type){
uint32_t result =0 ;
UStringPrepType retType;
UBool isIndex=FALSE;
int32_t value=0;
UTrie trie = data->sprepTrie;
/*
// supplementary code point
UChar __lead16=UTF16_LEAD(0x2323E);
int32_t __offset;
// get data for lead surrogate
(result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16));
__offset=(&idnTrie)->getFoldingOffset(result);
// get the real data from the folded lead/trail units
if(__offset>0) {
(result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff);
} else {
(result)=(uint32_t)((&idnTrie)->initialValue);
}
UTRIE_GET16(&idnTrie,0x2323E, result);
*/
while(start < end+1){
UTRIE_GET16(&trie,start, result);
retType = getValues(result, &value, &isIndex);
if(result > _SPREP_TYPE_THRESHOLD){
if(retType != type){
log_err( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
}
}else{
if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){
log_err( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
}
}
start++;
}
}
void
doStringPrepTest(const char* binFileName, const char* txtFileName, int32_t options, UErrorCode* errorCode){
const char *testdatapath = loadTestData(errorCode);
const char *srcdatapath =ctest_dataOutDir();
char *filename = (char*) malloc(2 * uprv_strlen(srcdatapath) );
const char *relativepath = ".."U_FILE_SEP_STRING".."U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING;
UStringPrepProfile* profile = usprep_open(testdatapath, binFileName, errorCode);
if(U_FAILURE(*errorCode)){
log_err("Failed to load %s data file. Error: %s \n", binFileName, u_errorName(*errorCode));
return;
}
/* open and load the txt file */
uprv_strcpy(filename,srcdatapath);
uprv_strcat(filename,relativepath);
uprv_strcat(filename,txtFileName);
parseMappings(filename,profile, TRUE,errorCode);
}
#endif
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/

View file

@ -273,7 +273,9 @@ testTrieRangesWithMalloc(const char *testName,
storage = (uint8_t*) uprv_malloc(sizeof(uint8_t)*100000);
log_verbose("\ntesting Trie '%s'\n", testName);
newTrie=utrie_open(NULL, NULL, 2000, checkRanges[0].value, latin1Linear);
newTrie=utrie_open(NULL, NULL, 2000,
checkRanges[0].value, checkRanges[0].value,
latin1Linear);
/* set values from setRanges[] */
ok=TRUE;
@ -457,7 +459,9 @@ testTrieRanges(const char *testName,
UBool overwrite, ok;
log_verbose("\ntesting Trie '%s'\n", testName);
newTrie=utrie_open(NULL, NULL, 2000, checkRanges[0].value, latin1Linear);
newTrie=utrie_open(NULL, NULL, 2000,
checkRanges[0].value, checkRanges[0].value,
latin1Linear);
/* set values from setRanges[] */
ok=TRUE;

View file

@ -15,7 +15,7 @@ U_NAMESPACE_USE
ContractionTableTest::ContractionTableTest() {
status = U_ZERO_ERROR;
/*testMapping = ucmpe32_open(0, 0, 0, &status);*/
testMapping = utrie_open(NULL, NULL, 0, 0, TRUE);
testMapping = utrie_open(NULL, NULL, 0, 0, 0, TRUE);
}
ContractionTableTest::~ContractionTableTest() {

View file

@ -18,7 +18,6 @@
#if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
#include "idnaref.h"
#include "strprep.h"
#include "punyref.h"
#include "ustr_imp.h"
#include "cmemory.h"

View file

@ -162,7 +162,7 @@ int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength,
for(;bufIndex<bufLen;){
U16_NEXT(buffer, bufIndex, bufLen, ch);
if(unassigned.contains(ch)){
status = U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR;
status = U_IDNA_UNASSIGNED_ERROR;
rsource.releaseBuffer();
return 0;
}
@ -231,7 +231,7 @@ int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength,
U16_NEXT(b1, b1Index, b1Len, ch);
if(prohibited.contains(ch) && ch!=0x0020){
status = U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR;
status = U_IDNA_PROHIBITED_ERROR;
goto CLEANUP;
}

View file

@ -22,7 +22,6 @@
#if !UCONFIG_NO_IDNA
#if !UCONFIG_NO_TRANSLITERATION
#include "strprep.h"
#include "unicode/uniset.h"
#include "unicode/ures.h"
#include "unicode/translit.h"

View file

@ -37,39 +37,22 @@
#include "umutex.h"
#include "sprpimpl.h"
#include "testidna.h"
#include "punyref.h"
UBool beVerbose=FALSE, haveCopyright=TRUE;
/* prototypes --------------------------------------------------------------- */
static UBool isDataLoaded = FALSE;
static UTrie idnTrie={ 0,0,0,0,0,0,0 };
static UDataMemory *idnData=NULL;
static UErrorCode dataErrorCode =U_ZERO_ERROR;
static const uint16_t* mappingData = NULL;
static int32_t indexes[_IDNA_INDEX_TOP]={ 0 };
static void
parseMappings(const char *filename, UBool withNorm, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode);
static void
parseTable(const char *filename, UBool isUnassigned, TestIDNA& test, UErrorCode *pErrorCode);
static UBool loadIDNData(UErrorCode &errorCode);
static UBool cleanup();
parseMappings(const char *filename, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode);
static void
compareMapping(uint32_t codepoint, uint32_t* mapping, int32_t mapLength,
UBool withNorm);
UStringPrepType option);
static void
compareFlagsForRange(uint32_t start, uint32_t end,
UBool isUnassigned);
compareFlagsForRange(uint32_t start, uint32_t end,UStringPrepType option);
static void
testAllCodepoints(TestIDNA& test);
@ -77,12 +60,12 @@ testAllCodepoints(TestIDNA& test);
static TestIDNA* pTestIDNA =NULL;
static const char* fileNames[] = {
"rfc3454_A_1.txt", /* contains unassigned code points */
"rfc3454_C_X.txt", /* contains code points that are prohibited */
"rfc3454_B_1.txt", /* contains case mappings when normalization is turned off */
"rfc3454_B_2.txt", /* contains case mappings when normalization it turned on */
/* "NormalizationCorrections.txt",contains NFKC case mappings whicha are not included in UTR 21 */
};
"NamePrepProfile.txt"
};
static UStringPrepProfile *profile = NULL;
static const UTrie *idnTrie = NULL;
static const int32_t *indexes = NULL;
static const uint16_t *mappingData = NULL;
/* -------------------------------------------------------------------------- */
/* file definitions */
@ -100,12 +83,16 @@ testData(TestIDNA& test) {
UErrorCode errorCode=U_ZERO_ERROR;
char *saveBasename =NULL;
loadIDNData(errorCode);
if(U_FAILURE(dataErrorCode)){
test.errln( "Could not load data. Error: %s\n",u_errorName(dataErrorCode));
return dataErrorCode;
profile = usprep_open(NULL, DATA_NAME, &errorCode);
if(U_FAILURE(errorCode)){
test.errln("Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode)));
return errorCode;
}
idnTrie = &profile->sprepTrie;
indexes = profile->indexes;
mappingData = profile->mappingData;
//initialize
pTestIDNA = &test;
@ -136,28 +123,7 @@ testData(TestIDNA& test) {
/* process unassigned */
uprv_strcpy(basename,fileNames[0]);
parseTable(filename,TRUE, test,&errorCode);
if(U_FAILURE(errorCode)) {
test.errln( "Could not open file %s for reading \n", filename);
return errorCode;
}
/* process prohibited */
uprv_strcpy(basename,fileNames[1]);
parseTable(filename,FALSE, test, &errorCode);
if(U_FAILURE(errorCode)) {
test.errln( "Could not open file %s for reading \n", filename);
return errorCode;
}
/* process mappings */
uprv_strcpy(basename,fileNames[2]);
parseMappings(filename, FALSE, FALSE,test, &errorCode);
if(U_FAILURE(errorCode)) {
test.errln( "Could not open file %s for reading \n", filename);
return errorCode;
}
uprv_strcpy(basename,fileNames[3]);
parseMappings(filename, TRUE, FALSE,test, &errorCode);
parseMappings(filename,TRUE, test,&errorCode);
if(U_FAILURE(errorCode)) {
test.errln( "Could not open file %s for reading \n", filename);
return errorCode;
@ -165,48 +131,76 @@ testData(TestIDNA& test) {
testAllCodepoints(test);
cleanup();
usprep_close(profile);
pTestIDNA = NULL;
free(filename);
return errorCode;
}
U_CDECL_BEGIN
static void U_CALLCONV
caseMapLineFn(void *context,
char *fields[][2], int32_t /*fieldCount*/,
strprepProfileLineFn(void *context,
char *fields[][2], int32_t fieldCount,
UErrorCode *pErrorCode) {
uint32_t mapping[40];
char *end, *s;
char *end, *map;
uint32_t code;
int32_t length;
UBool* mapWithNorm = (UBool*) context;
/*UBool* mapWithNorm = (UBool*) context;*/
const char* typeName;
uint32_t rangeStart=0,rangeEnd =0;
const char* filename = (const char*) context;
typeName = fields[2][0];
map = fields[1][0];
if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
/* get the character code, field 0 */
code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
if(end<=fields[0][0] || end!=fields[0][1]) {
*pErrorCode=U_PARSE_ERROR;
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
/* store the range */
compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED);
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
/* store the range */
compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED);
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
/* get the character code, field 0 */
code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
/* parse the mapping string */
length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
/* store the mapping */
compareMapping(code,mapping, length,USPREP_MAP);
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_LABEL_SEPARATOR])!=NULL){
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
/* store the range */
compareFlagsForRange(rangeStart,rangeEnd,USPREP_LABEL_SEPARATOR);
}else{
*pErrorCode = U_INVALID_FORMAT_ERROR;
}
s = fields[1][0];
/* parse the mapping string */
length=u_parseCodePoints(s, mapping, sizeof(mapping)/4, pErrorCode);
/* store the mapping */
compareMapping(code,mapping, length, *mapWithNorm);
}
U_CDECL_END
static void
parseMappings(const char *filename,UBool withNorm, UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) {
parseMappings(const char *filename,UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) {
char *fields[3][2];
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return;
}
u_parseDelimitedFile(filename, ';', fields, 3, caseMapLineFn, &withNorm, pErrorCode);
u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode);
//fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);
@ -215,142 +209,167 @@ parseMappings(const char *filename,UBool withNorm, UBool reportError, TestIDNA&
}
}
/* parser for UnicodeData.txt ----------------------------------------------- */
U_CDECL_BEGIN
static void U_CALLCONV
unicodeDataLineFn(void *context,
char *fields[][2], int32_t /*fieldCount*/,
UErrorCode *pErrorCode) {
uint32_t rangeStart=0,rangeEnd =0;
UBool* isUnassigned = (UBool*) context;
static inline UStringPrepType
getValues(uint32_t result, int32_t& value, UBool& isIndex){
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
if(U_FAILURE(*pErrorCode)){
*pErrorCode = U_PARSE_ERROR;
return;
UStringPrepType type;
if(result == 0){
/*
* Initial value stored in the mapping table
* just return USPREP_TYPE_LIMIT .. so that
* the source codepoint is copied to the destination
*/
type = USPREP_TYPE_LIMIT;
}else if(result >= _SPREP_TYPE_THRESHOLD){
type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD);
}else{
/* get the state */
type = USPREP_MAP;
/* ascertain if the value is index or delta */
if(result & 0x02){
isIndex = TRUE;
value = result >> 2; //mask off the lower 2 bits and shift
}else{
isIndex = FALSE;
value = (int16_t)result;
value = (value >> 2);
}
if((result>>2) == _SPREP_MAX_INDEX_VALUE){
type = USPREP_DELETE;
isIndex =FALSE;
value = 0;
}
}
compareFlagsForRange(rangeStart,rangeEnd,*isUnassigned);
return type;
}
U_CDECL_END
static void
parseTable(const char *filename,UBool isUnassigned,TestIDNA& test, UErrorCode *pErrorCode) {
char *fields[2][2];
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return;
}
u_parseDelimitedFile(filename, ';', fields, 1, unicodeDataLineFn, &isUnassigned, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
test.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
}
}
static void
testAllCodepoints(TestIDNA& test){
if(isDataLoaded){
uint32_t i = 0;
int32_t unassigned = 0;
int32_t prohibited = 0;
int32_t mappedWithNorm = 0;
int32_t mapped = 0;
int32_t noValueInTrie = 0;
/*
{
UChar str[19] = {
0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
0x070F,//prohibited
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74
};
uint32_t in[19] = {0};
UErrorCode status = U_ZERO_ERROR;
int32_t inLength=0, outLength=100;
char output[100] = {0};
punycode_status error;
u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status);
for(i=0;i<=0x10FFFF;i++){
uint32_t result = 0;
UTRIE_GET16(&idnTrie,i, result);
if(result != UIDNA_NO_VALUE ){
if((result & 0x07) == UIDNA_UNASSIGNED){
unassigned++;
}
if((result & 0x07) == UIDNA_PROHIBITED){
prohibited++;
}
if((result>>5) == _IDNA_MAP_TO_NOTHING){
mapped++;
}
if((result & 0x07) == UIDNA_MAP_NFKC){
mappedWithNorm++;
}
}else{
noValueInTrie++;
if(result > 0){
test.errln("The return value for 0x%06X is wrong. %i\n",i,result);
}
}
}
test.logln("Number of Unassinged code points : %i \n",unassigned);
test.logln("Number of Prohibited code points : %i \n",prohibited);
test.logln("Number of Mapped code points : %i \n",mapped);
test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm);
test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie);
error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output);
printf(output);
}
}
*/
static inline void getValues(uint32_t result, int8_t& flag,
int8_t& length, int32_t& index){
/* first 3 bits contain the flag */
flag = (int8_t) (result & 0x07);
/* next 2 bits contain the length */
length = (int8_t) ((result>>3) & 0x03);
/* next 11 bits contain the index */
index = (result>> 5);
uint32_t i = 0;
int32_t unassigned = 0;
int32_t prohibited = 0;
int32_t mappedWithNorm = 0;
int32_t mapped = 0;
int32_t noValueInTrie = 0;
UStringPrepType type;
int32_t value;
UBool isIndex = FALSE;
for(i=0;i<=0x10FFFF;i++){
uint32_t result = 0;
UTRIE_GET16(idnTrie,i, result);
type = getValues(result,value, isIndex);
if(type != USPREP_TYPE_LIMIT ){
if(type == USPREP_UNASSIGNED){
unassigned++;
}
if(type == USPREP_PROHIBITED){
prohibited++;
}
if(type == USPREP_MAP){
mapped++;
}
if(type == USPREP_LABEL_SEPARATOR){
mappedWithNorm++;
}
}else{
noValueInTrie++;
if(result > 0){
test.errln("The return value for 0x%06X is wrong. %i\n",i,result);
}
}
}
test.logln("Number of Unassinged code points : %i \n",unassigned);
test.logln("Number of Prohibited code points : %i \n",prohibited);
test.logln("Number of Mapped code points : %i \n",mapped);
test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm);
test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie);
}
static void
compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength,
UBool withNorm){
if(isDataLoaded){
uint32_t result = 0;
UTRIE_GET16(&idnTrie,codepoint, result);
UStringPrepType type){
uint32_t result = 0;
UTRIE_GET16(idnTrie,codepoint, result);
int8_t flag, length;
int32_t index;
getValues(result,flag,length, index);
int32_t length=0;
UBool isIndex;
UStringPrepType retType;
int32_t value, index=0, delta=0;
retType = getValues(result,value,isIndex);
if(withNorm){
if(flag != UIDNA_MAP_NFKC){
pTestIDNA->errln( "Did not get the assigned flag for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, UIDNA_MAP_NFKC, flag);
}
if(type != retType && retType != USPREP_DELETE){
pTestIDNA->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type);
}
if(isIndex){
index = value;
if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
length = 1;
}else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
length = 2;
}else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
length = 3;
}else{
if(flag==UIDNA_NO_VALUE || flag == UIDNA_PROHIBITED){
if(index != _IDNA_MAP_TO_NOTHING ){
pTestIDNA->errln( "Did not get the assigned flag for codepoint 0x%08X. Expected: %i Got: %i\n", codepoint, _IDNA_MAP_TO_NOTHING, index);
}
}
}
if(length ==_IDNA_LENGTH_IN_MAPPING_TABLE){
length = (int8_t)mappingData[index];
index++;
}
int32_t realLength =0;
/* figure out the real length */
for(int32_t j=0; j<mapLength; j++){
if(mapping[j] > 0xFFFF){
realLength +=2;
}else{
realLength++;
}
length = mappingData[index++];
}
}else{
delta = value;
length = (retType == USPREP_DELETE)? 0 : 1;
}
if(realLength != length){
pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length);
}
int32_t realLength =0;
/* figure out the real length */
for(int32_t j=0; j<mapLength; j++){
if(mapping[j] > 0xFFFF){
realLength +=2;
}else{
realLength++;
}
}
if(realLength != length){
pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length);
}
if(isIndex){
for(int8_t i =0; i< mapLength; i++){
if(mapping[i] <= 0xFFFF){
if(mappingData[index+i] != (uint16_t)mapping[i]){
@ -365,132 +384,58 @@ compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength,
}
}
}
}else{
if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){
pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta));
}
}
}
static void
compareFlagsForRange(uint32_t start, uint32_t end,
UBool isUnassigned){
if(isDataLoaded){
uint32_t result =0 ;
while(start < end+1){
UTRIE_GET16(&idnTrie,start, result);
if(isUnassigned){
if(result != UIDNA_UNASSIGNED){
pTestIDNA->errln( "UIDNA_UASSIGNED flag failed for 0x%06X. Expected: %04X Got: %04X\n",start,UIDNA_UNASSIGNED, result);
}
}else{
if((result & 0x03) != UIDNA_PROHIBITED){
pTestIDNA->errln( "UIDNA_PROHIBITED flag failed for 0x%06X. Expected: %04X Got: %04X\n\n",start,UIDNA_PROHIBITED, result);
}
UStringPrepType type){
uint32_t result =0 ;
UStringPrepType retType;
UBool isIndex=FALSE;
int32_t value=0;
/*
// supplementary code point
UChar __lead16=UTF16_LEAD(0x2323E);
int32_t __offset;
// get data for lead surrogate
(result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16));
__offset=(&idnTrie)->getFoldingOffset(result);
// get the real data from the folded lead/trail units
if(__offset>0) {
(result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff);
} else {
(result)=(uint32_t)((&idnTrie)->initialValue);
}
UTRIE_GET16(&idnTrie,0x2323E, result);
*/
while(start < end+1){
UTRIE_GET16(idnTrie,start, result);
retType = getValues(result,value,isIndex);
if(result > _SPREP_TYPE_THRESHOLD){
if(retType != type){
pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
}
}else{
if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){
pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
}
start++;
}
start++;
}
}
UBool
cleanup() {
if(idnData!=NULL) {
udata_close(idnData);
idnData=NULL;
}
dataErrorCode=U_ZERO_ERROR;
isDataLoaded=FALSE;
return TRUE;
}
U_CDECL_BEGIN
static UBool U_CALLCONV
isAcceptable(void * /* context */,
const char * /* type */, const char * /* name */,
const UDataInfo *pInfo) {
if(
pInfo->size>=20 &&
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
pInfo->charsetFamily==U_CHARSET_FAMILY &&
pInfo->dataFormat[0]==0x49 && /* dataFormat="IDNA" 0x49, 0x44, 0x4e, 0x41 */
pInfo->dataFormat[1]==0x44 &&
pInfo->dataFormat[2]==0x4e &&
pInfo->dataFormat[3]==0x41 &&
pInfo->formatVersion[0]==2 &&
pInfo->formatVersion[2]==UTRIE_SHIFT &&
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
) {
return TRUE;
} else {
return FALSE;
}
}
/* idnTrie: the folding offset is the lead FCD value itself */
static int32_t U_CALLCONV
getFoldingOffset(uint32_t data) {
if(data&0x8000) {
return (int32_t)(data&0x7fff);
} else {
return 0;
}
}
U_CDECL_END
static UBool
loadIDNData(UErrorCode &errorCode) {
/* load Unicode normalization data from file */
if(isDataLoaded==FALSE) {
UTrie _idnTrie={ 0,0,0,0,0,0,0 };
UDataMemory *data;
const int32_t *p=NULL;
const uint8_t *pb;
if(&errorCode==NULL || U_FAILURE(errorCode)) {
return 0;
}
/* open the data outside the mutex block */
data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errorCode);
dataErrorCode=errorCode;
if(U_FAILURE(errorCode)) {
return isDataLoaded=FALSE;
}
p=(const int32_t *)udata_getMemory(data);
pb=(const uint8_t *)(p+_IDNA_INDEX_TOP);
utrie_unserialize(&_idnTrie, pb, p[_IDNA_INDEX_TRIE_SIZE], &errorCode);
_idnTrie.getFoldingOffset=getFoldingOffset;
if(U_FAILURE(errorCode)) {
dataErrorCode=errorCode;
udata_close(data);
return isDataLoaded=FALSE;
}
/* in the mutex block, set the data for this process */
umtx_lock(NULL);
if(idnData==NULL) {
idnData=data;
data=NULL;
uprv_memcpy(&indexes, p, sizeof(indexes));
uprv_memcpy(&idnTrie, &_idnTrie, sizeof(UTrie));
} else {
p=(const int32_t *)udata_getMemory(idnData);
}
umtx_unlock(NULL);
/* initialize some variables */
mappingData=(uint16_t *)((uint8_t *)(p+_IDNA_INDEX_TOP)+indexes[_IDNA_INDEX_TRIE_SIZE]);
isDataLoaded = TRUE;
/* if a different thread set it first, then close the extra data */
if(data!=NULL) {
udata_close(data); /* NULL if it was set correctly */
}
}
return isDataLoaded;
}
#endif /* #if !UCONFIG_NO_IDNA */

View file

@ -221,7 +221,7 @@ static const char *domainNames[] = {
"www.\\u0021.com",
"www.\\u0024.com",
"\\u003f",
// These yeild U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
// These yeild U_IDNA_PROHIBITED_ERROR
//"\\u00CF\\u0082.com",
//"\\u00CE\\u00B2\\u00C3\\u009Fss.com",
//"\\u00E2\\u0098\\u00BA.com",
@ -245,13 +245,13 @@ static struct ErrorCases{
{
0x0077, 0x0077, 0x0077, 0x002e, /* www. */
0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
0x2060,/*prohibited*/
0x070F,/*prohibited*/
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
0x002e, 0x0063, 0x006f, 0x006d, /* com. */
0x0000
},
"www.XN--fxG2146CsoA28OruCyA378BqrE2tCwOp06C5qBw82A1rFfmAE0361DeA96B.com",
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR,
"www.XN--8mb5595fsoa28orucya378bqre2tcwop06c5qbw82a1rffmae0361dea96b.com",
U_IDNA_PROHIBITED_ERROR,
FALSE, TRUE, TRUE
},
@ -265,7 +265,7 @@ static struct ErrorCases{
},
"www.XN--6lA2Bz548Fj1GuA391Bf1Gb1N59Ab29A7iA.com",
U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR,
U_IDNA_UNASSIGNED_ERROR,
FALSE, TRUE, TRUE
},
{
@ -349,7 +349,7 @@ static struct ErrorCases{
0x0000
},
"www.XN--ghbgi278xia.com",
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR,
U_IDNA_PROHIBITED_ERROR,
FALSE, TRUE, TRUE
},
{
@ -423,78 +423,78 @@ static struct ConformanceTestCases
"Non-ASCII multibyte space character U+1680",
"\xE1\x9A\x80", NULL,
"Nameprep", UIDNA_DEFAULT,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
U_IDNA_PROHIBITED_ERROR
},
{
"Non-ASCII 8bit control character U+0085",
"\xC2\x85", NULL,
"Nameprep", UIDNA_DEFAULT,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
U_IDNA_PROHIBITED_ERROR
},
{
"Non-ASCII multibyte control character U+180E",
"\xE1\xA0\x8E", NULL,
"Nameprep", UIDNA_DEFAULT,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
U_IDNA_PROHIBITED_ERROR
},
{
"Non-ASCII control character U+1D175",
"\xF0\x9D\x85\xB5", NULL,
"Nameprep", UIDNA_DEFAULT,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
U_IDNA_PROHIBITED_ERROR
},
{
"Plane 0 private use character U+F123",
"\xEF\x84\xA3", NULL,
"Nameprep", UIDNA_DEFAULT,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
U_IDNA_PROHIBITED_ERROR
},
{
"Plane 15 private use character U+F1234",
"\xF3\xB1\x88\xB4", NULL,
"Nameprep", UIDNA_DEFAULT,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
U_IDNA_PROHIBITED_ERROR
},
{
"Plane 16 private use character U+10F234",
"\xF4\x8F\x88\xB4", NULL,
"Nameprep", UIDNA_DEFAULT,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
U_IDNA_PROHIBITED_ERROR
},
{
"Non-character code point U+8FFFE",
"\xF2\x8F\xBF\xBE", NULL,
"Nameprep", UIDNA_DEFAULT,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
U_IDNA_PROHIBITED_ERROR
},
{
"Non-character code point U+10FFFF",
"\xF4\x8F\xBF\xBF", NULL,
"Nameprep", UIDNA_DEFAULT,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
U_IDNA_PROHIBITED_ERROR
},
/*
{
"Surrogate code U+DF42",
"\xED\xBD\x82", NULL, "Nameprep", UIDNA_DEFAULT,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
U_IDNA_PROHIBITED_ERROR
},
*/
{
"Non-plain text character U+FFFD",
"\xEF\xBF\xBD", NULL,
"Nameprep", UIDNA_DEFAULT,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
U_IDNA_PROHIBITED_ERROR
},
{
"Ideographic description character U+2FF5",
"\xE2\xBF\xB5", NULL,
"Nameprep", UIDNA_DEFAULT,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
U_IDNA_PROHIBITED_ERROR
},
{
"Display property character U+0341",
"\xCD\x81", "\xCD\x81",
"\xCD\x81", "\xCC\x81",
"Nameprep", UIDNA_DEFAULT, U_ZERO_ERROR
},
@ -503,26 +503,26 @@ static struct ConformanceTestCases
"Left-to-right mark U+200E",
"\xE2\x80\x8E", "\xCC\x81",
"Nameprep", UIDNA_DEFAULT,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
U_IDNA_PROHIBITED_ERROR
},
{
"Deprecated U+202A",
"\xE2\x80\xAA", "\xCC\x81",
"Nameprep", UIDNA_DEFAULT,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
U_IDNA_PROHIBITED_ERROR
},
{
"Language tagging character U+E0001",
"\xF3\xA0\x80\x81", "\xCC\x81",
"Nameprep", UIDNA_DEFAULT,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
U_IDNA_PROHIBITED_ERROR
},
{
"Language tagging character U+E0042",
"\xF3\xA0\x81\x82", NULL,
"Nameprep", UIDNA_DEFAULT,
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
U_IDNA_PROHIBITED_ERROR
},
{
"Bidi: RandALCat character U+05BE and LCat characters",
@ -557,7 +557,7 @@ static struct ConformanceTestCases
"Unassigned code point U+E0002",
"\xF3\xA0\x80\x82", NULL,
"Nameprep", UIDNA_DEFAULT,
U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR
U_IDNA_UNASSIGNED_ERROR
},
/* // Invalid UTF-8
@ -585,7 +585,39 @@ static struct ConformanceTestCases
#define MAX_DEST_SIZE 300
void TestIDNA::debug(const UChar* src, int32_t srcLength, int32_t options){
UParseError parseError;
UErrorCode transStatus = U_ZERO_ERROR;
UErrorCode prepStatus = U_ZERO_ERROR;
NamePrepTransform* trans = NamePrepTransform::createInstance(parseError,transStatus);
int32_t prepOptions = (((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0);
UStringPrepProfile* prep = usprep_open(NULL,"uidna",&prepStatus);
UChar *transOut=NULL, *prepOut=NULL;
int32_t transOutLength=0, prepOutLength=0;
transOutLength = trans->process(src,srcLength,transOut, 0, prepOptions>0, &parseError, transStatus);
if( transStatus == U_BUFFER_OVERFLOW_ERROR){
transStatus = U_ZERO_ERROR;
transOut = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * transOutLength);
transOutLength = trans->process(src,srcLength,transOut, transOutLength, prepOptions>0, &parseError, transStatus);
}
prepOutLength = usprep_prepare(prep, src, srcLength, prepOut, 0, prepOptions, &parseError, &prepStatus);
if( prepStatus == U_BUFFER_OVERFLOW_ERROR){
prepStatus = U_ZERO_ERROR;
prepOut = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * prepOutLength);
prepOutLength = usprep_prepare(prep, src, srcLength, prepOut, prepOutLength, prepOptions, &parseError, &prepStatus);
}
if(UnicodeString(transOut,transOutLength)!= UnicodeString(prepOut, prepOutLength)){
errln("Failed. Expected: " + prettify(UnicodeString(transOut, transOutLength))
+ " Got: " + prettify(UnicodeString(prepOut,prepOutLength)));
}
}
void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* testName,
UBool useSTD3ASCIIRules,UErrorCode expectedStatus,
@ -609,7 +641,7 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
// test null-terminated source and return value of number of UChars required
if( expectedStatus != U_IDNA_STD3_ASCII_RULES_ERROR ){
destLen = func(src,-1,dest,0,options, &parseError , &status);
destLen = func(src,-1,NULL,0,options, &parseError , &status);
if(status == U_BUFFER_OVERFLOW_ERROR){
status = U_ZERO_ERROR; // reset error code
if(destLen+1 < MAX_DEST_SIZE){
@ -634,7 +666,7 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
}
if(testUnassigned ){
status = U_ZERO_ERROR;
destLen = func(src,-1,dest,0,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status);
destLen = func(src,-1,NULL,0,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status);
if(status == U_BUFFER_OVERFLOW_ERROR){
status = U_ZERO_ERROR; // reset error code
if(destLen+1 < MAX_DEST_SIZE){
@ -643,7 +675,12 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
// TODO : compare output with expected
if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){
//errln("Did not get the expected result for %s null terminated source with both options set.\n",testName);
errln("Did not get the expected result for "+UnicodeString(testName) +" null terminated source with both options set. Expected: "+ prettify(UnicodeString(expected,expectedLen)));
errln("Did not get the expected result for "+UnicodeString(testName) +
" null terminated source "+ prettify(src) +
" with both options set. Expected: "+ prettify(UnicodeString(expected,expectedLen))+
"Got: " + prettify(UnicodeString(dest,destLen)));
debug(src,-1,options | UIDNA_ALLOW_UNASSIGNED);
}
}else{
@ -651,7 +688,7 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
}
}
//testing query string
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR){
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_ERROR){
errln( "Did not get the expected error for %s null terminated source with options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
}
}
@ -659,7 +696,7 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
status = U_ZERO_ERROR;
// test source with lengthand return value of number of UChars required
destLen = func(tSrc, tSrcLen, dest,0,options, &parseError, &status);
destLen = func(tSrc, tSrcLen, NULL,0,options, &parseError, &status);
if(status == U_BUFFER_OVERFLOW_ERROR){
status = U_ZERO_ERROR; // reset error code
if(destLen+1 < MAX_DEST_SIZE){
@ -680,7 +717,7 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
if(testUnassigned){
status = U_ZERO_ERROR;
destLen = func(tSrc,tSrcLen,dest,0,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status);
destLen = func(tSrc,tSrcLen,NULL,0,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status);
if(status == U_BUFFER_OVERFLOW_ERROR){
status = U_ZERO_ERROR; // reset error code
@ -696,14 +733,14 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
}
}
//testing query string
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR){
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_ERROR){
errln( "Did not get the expected error for %s with source length and options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
}
}
}else{
status = U_ZERO_ERROR;
destLen = func(src,-1,dest,0,options | UIDNA_USE_STD3_RULES, &parseError, &status);
destLen = func(src,-1,NULL,0,options | UIDNA_USE_STD3_RULES, &parseError, &status);
if(status == U_BUFFER_OVERFLOW_ERROR){
status = U_ZERO_ERROR; // reset error code
if(destLen+1 < MAX_DEST_SIZE){
@ -726,7 +763,7 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
status = U_ZERO_ERROR;
destLen = func(tSrc,tSrcLen,dest,0,options | UIDNA_USE_STD3_RULES, &parseError, &status);
destLen = func(tSrc,tSrcLen,NULL,0,options | UIDNA_USE_STD3_RULES, &parseError, &status);
if(status == U_BUFFER_OVERFLOW_ERROR){
status = U_ZERO_ERROR; // reset error code
@ -742,7 +779,7 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
}
}
//testing query string
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR){
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_ERROR){
errln( "Did not get the expected error for %s with source length and options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
}
}
@ -1078,13 +1115,13 @@ void TestIDNA::testConformance(const char* toASCIIName, TestFunc toASCII,
IDNToASCIIName, FALSE,
conformanceTestCases[i].expectedStatus,
TRUE,
(conformanceTestCases[i].expectedStatus != U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR),
(conformanceTestCases[i].expectedStatus != U_IDNA_UNASSIGNED_ERROR),
IDNToASCII);
testAPI(src,expected,
toASCIIName, FALSE,
conformanceTestCases[i].expectedStatus, TRUE,
(conformanceTestCases[i].expectedStatus != U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR),
(conformanceTestCases[i].expectedStatus != U_IDNA_UNASSIGNED_ERROR),
toASCII);
}
@ -1474,11 +1511,15 @@ void TestIDNA::testCompareReferenceImpl(const UChar* src, int32_t srcLen){
asciiLen = idnaref_toASCII(labelUChars, label.length()-1,ascii,asciiCapacity,
UIDNA_DEFAULT,&parseError,&expectedStatus);
if(expectedStatus == U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR){
if(expectedStatus == U_IDNA_UNASSIGNED_ERROR){
expectedStatus = U_ZERO_ERROR;
asciiLen = idnaref_toASCII(labelUChars, label.length()-1,ascii,asciiCapacity,
UIDNA_ALLOW_UNASSIGNED,&parseError,&expectedStatus);
expectedStatus = U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR;
if(expectedStatus==U_BUFFER_OVERFLOW_ERROR){
errln("idnaref_toASCII failed. Error:" + UnicodeString(u_errorName(expectedStatus)));
return;
}
expectedStatus = U_IDNA_UNASSIGNED_ERROR;
}
testAPI(labelUChars,ascii, "uidna_toASCII",FALSE,
@ -1488,11 +1529,15 @@ void TestIDNA::testCompareReferenceImpl(const UChar* src, int32_t srcLen){
expectedStatus = U_ZERO_ERROR;
uniLen = idnaref_toUnicode(ascii, asciiLen, uni,uniCapacity,UIDNA_DEFAULT,
&parseError,&expectedStatus);
if(expectedStatus == U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR){
if(expectedStatus == U_IDNA_UNASSIGNED_ERROR){
expectedStatus = U_ZERO_ERROR;
uniLen = idnaref_toUnicode(ascii, asciiLen, uni,uniCapacity,UIDNA_DEFAULT,
&parseError,&expectedStatus);
expectedStatus = U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR;
if(expectedStatus==U_BUFFER_OVERFLOW_ERROR){
errln("idnaref_toASCII failed. Error:" + UnicodeString(u_errorName(expectedStatus)));
return;
}
expectedStatus = U_IDNA_UNASSIGNED_ERROR;
}
testAPI(ascii,uni,"uidna_toUnicode",FALSE,expectedStatus,TRUE, FALSE, uidna_toUnicode);
}
@ -1504,7 +1549,7 @@ void TestIDNA::TestIDNAMonkeyTest(){
UErrorCode status = U_ZERO_ERROR;
getInstance(status); // Init prep
/*
for(int i=0; i<loopCount; i++){
source.truncate(0);
getTestSource(source);
@ -1512,14 +1557,20 @@ void TestIDNA::TestIDNAMonkeyTest(){
testCompareReferenceImpl(source.getBuffer(),source.length()-1);
source.releaseBuffer();
}
/* for debugging
source.append("\\U000E5BC8\\U00025112\\U00016846\\U0001B375\\U0002EDE4"
"\\U00016E18\\U00010B84\\U000E1639\\U0001C3BE\\u336B\\u5F66"
"\\u2AA6\\uD817\\u0000");
source = source.unescape();
testCompareReferenceImpl(source.getBuffer(),source.length()-1);
source.releaseBuffer();
*/
/* for debugging */
source.append( "\\u2109\\u3E1B\\U000E65CA\\U0001CAC5" );
source = source.unescape();
//testCompareReferenceImpl(source.getBuffer(),source.length());
debug(source.getBuffer(),source.length(),UIDNA_ALLOW_UNASSIGNED);
source.releaseBuffer();
source.truncate(0);
source.append("\\uCF18\\U00021161\\U000EEF11\\U0002BB82\\U0001D63C");
debug(source.getBuffer(),source.length(),UIDNA_ALLOW_UNASSIGNED);
source.releaseBuffer();
delete TestIDNA::prep;
TestIDNA::prep = NULL;

View file

@ -74,7 +74,7 @@ private:
void testCompare(const char* testName, CompareFunc func);
void testChaining(const char* toASCIIName, TestFunc toASCII,
const char* toUnicodeName, TestFunc toUnicode);
void debug(const UChar* src, int32_t srcLength, int32_t options);
// main testing functions
void testAPI(const UChar *src, const UChar *expected, const char *testName,
UBool useSTD3ASCIIRules, UErrorCode expectedStatus,

View file

@ -0,0 +1,8 @@
*.d
*.pdb
Debug
Makefile
Release
gensprep
gensprep.8
gensprep.plg

View file

@ -0,0 +1,102 @@
## Makefile.in for ICU - tools/gensprep
## Copyright (c) 2001-2003, International Business Machines Corporation and
## others. All Rights Reserved.
## Steven R. Loomis/Markus W. Scherer
## Source directory information
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = ../..
include $(top_builddir)/icudefs.mk
##
SECTION = 8
MAN_FILES = $(TARGET:$(EXEEXT)=).$(SECTION)
## Build directory information
subdir = tools/gensprep
ICUDATADIR=$(top_builddir)/data
UNICODEDATADIR=$(top_srcdir)/../data/unidata
## Extra files to remove for 'make clean'
CLEANFILES = *~ $(DEPS) $(RES_FILES) $(TEST_FILES) $(MAN_FILES)
## Target information
TARGET = gensprep$(EXEEXT)
CPPFLAGS += -I$(top_builddir)/common -I$(top_srcdir)/common -I$(srcdir)/../toolutil
LIBS = $(LIBICUTOOLUTIL) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
OBJECTS = gensprep.o store.o
DEPS = $(OBJECTS:.o=.d)
## List of phony targets
.PHONY : all all-local install install-local clean clean-local \
distclean distclean-local dist dist-local check \
check-local build-data install-man
## Clear suffix list
.SUFFIXES :
## List of standard targets
all: all-local
install: install-local
clean: clean-local
distclean : distclean-local
dist: dist-local
check: all check-local
all-local: $(TARGET) build-data $(MAN_FILES)
install-local: all-local install-man
$(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
$(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)/$(TARGET)
# man page
install-man: $(MAN_FILES)
$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
$(INSTALL_DATA) $< $(DESTDIR)$(mandir)/man$(SECTION)
%.$(SECTION): $(srcdir)/%.$(SECTION).in
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
# build postscript and pdf formats
#$(TARGET).ps: $(TARGET).$(SECTION)
# groff -man < $< > $@
#$(TARGET).pdf: $(TARGET).ps
# ps2pdf $< $@
dist-local:
clean-local:
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
$(RMV) $(TARGET) $(OBJECTS)
distclean-local: clean-local
$(RMV) Makefile
check-local: all-local
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
$(TARGET) : $(OBJECTS)
$(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
ifeq (,$(MAKECMDGOALS))
-include $(DEPS)
else
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
-include $(DEPS)
endif
endif

View file

@ -0,0 +1,271 @@
#/usr/bin/perl
# Copyright (c) 2001-2003 International Business Machines
# Corporation and others. All Rights Reserved.
####################################################################################
# filterRFC3454.pl:
# This tool filters the RFC-3454 txt file for StringPrep tables and creates a table
# to be used in NamePrepProfile
#
# Author: Ram Viswanadha
#
####################################################################################
use File::Find;
use File::Basename;
use IO::File;
use Cwd;
use File::Copy;
use Getopt::Long;
use File::Path;
use File::Copy;
$copyright = "###################\n# Copyright (C) 2003, International Business Machines\n# Corporation and others. All Rights Reserved.\n###################\n\n";
$warning = "###################\n# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT \n###################\n\n";
#run the program
main();
#---------------------------------------------------------------------
# The main program
sub main(){
GetOptions(
"--sourcedir=s" => \$sourceDir,
"--destdir=s" => \$destDir,
"--src-filename=s" => \$srcFileName,
"--dest-filename=s" => \$destFileName,
"--A1" => \$a1,
"--B1" => \$b1,
"--B2" => \$b2,
"--C11" => \$c11,
"--C12" => \$c12,
"--C21" => \$c21,
"--C22" => \$c22,
"--C3" => \$c3,
"--C4" => \$c4,
"--C5" => \$c5,
"--C6" => \$c6,
"--C7" => \$c7,
"--C8" => \$c8,
"--C9" => \$c9,
"--ldh-chars" => \$writeLDHChars,
);
usage() unless defined $sourceDir;
usage() unless defined $destDir;
usage() unless defined $srcFileName;
usage() unless defined $destFileName;
$infile = $sourceDir."/".$srcFileName;
$inFH = IO::File->new($infile,"r")
or die "could not open the file $infile for reading: $! \n";
$outfile = $destDir."/".$destFileName;
unlink($outfile);
$outFH = IO::File->new($outfile,"a")
or die "could not open the file $outfile for writing: $! \n";
print $outFH $copyright;
print $outFH $warning;
close($outFH);
while(defined ($line=<$inFH>)){
next unless $line=~ /Start\sTable/;
if($line =~ /A.1/){
createUnassignedTable($inFH,$outfile);
}
if($line =~ /B.1/ && defined $b1){
createCaseMapNoNorm($inFH,$outfile);
}
if($line =~ /B.2/ && defined $b2){
createCaseMap($inFH,$outfile);
}
if($line =~ /C.1.1/ && defined $c11 ){
createProhibitedTable($inFH,$outfile,$line);
}
if($line =~ /C.1.2/ && defined $c12 ){
createProhibitedTable($inFH,$outfile,$line);
}
if($line =~ /C.2.1/ && defined $c21 ){
createProhibitedTable($inFH,$outfile,$line);
}
if($line =~ /C.2.2/ && defined $c22 ){
createProhibitedTable($inFH,$outfile,$line);
}
if($line =~ /C.3/ && defined $c3 ){
createProhibitedTable($inFH,$outfile,$line);
}
if($line =~ /C.4/ && defined $c4 ){
createProhibitedTable($inFH,$outfile,$line);
}
if($line =~ /C.5/ && defined $c5 ){
createProhibitedTable($inFH,$outfile,$line);
}
if($line =~ /C.6/ && defined $c6 ){
createProhibitedTable($inFH,$outfile,$line);
}
if($line =~ /C.7/ && defined $c7 ){
createProhibitedTable($inFH,$outfile,$line);
}
if($line =~ /C.8/ && defined $c8 ){
createProhibitedTable($inFH,$outfile,$line);
}
if($line =~ /C.9/ && defined $c9 ){
createProhibitedTable($inFH,$outfile,$line);
}
}
if( defined $writeLDHChars){
createLDHCharTable($inFH, $outfile);
}
close($inFH);
}
#-----------------------------------------------------------------------
sub readPrint{
local ($inFH, $outFH,$comment, $table) = @_;
$count = 0;
print $outFH $comment."\n";
while(defined ($line = <$inFH>)){
next if $line =~ /Hoffman\s\&\sBlanchet/; # ignore heading
next if $line =~ /RFC\s3454/; # ignore heading
next if $line =~ /\f/; # ignore form feed
next if $line eq "\n"; # ignore blank lines
# break if "End Table" is found
if( $line =~ /End\sTable/){
print $outFH "\n# Total code points $count\n\n";
return;
}
if($print==1){
print $line;
}
$line =~ s/-/../;
$line =~ s/^\s+//;
if($line =~ /\;/){
}else{
$line =~ s/$/;/;
}
if($table =~ /A/ ){
($code, $noise) = split /;/ , $line;
$line = $code."; ; UNASSIGNED\n";
}elsif ( $table =~ /B\.1/ ){
$line =~ s/Map to nothing/MAP/;
}elsif ( $table =~ /B\.2/ ){
$line =~ s/Case map/MAP/;
$line =~ s/Additional folding/MAP/;
}elsif ( $table =~ /C/ ) {
($code, $noise) = split /;/ , $line;
$line = $code."; ; PROHIBITED\n";
}
if($line =~ /\.\./){
($code, $noise) = split /;/ , $line;
($startStr, $endStr ) = split /\.\./, $code;
$start = atoi($startStr);
$end = atoi($endStr);
#print $start." ".$end."\n";
while($start <= $end){
$count++;
$start++;
}
}else{
$count++;
}
print $outFH $line;
}
}
#-----------------------------------------------------------------------
sub atoi {
my $t;
foreach my $d (split(//, shift())) {
$t = $t * 16 + $d;
}
return $t;
}
#-----------------------------------------------------------------------
sub createUnassignedTable{
($inFH,$outfile) = @_;
$outFH = IO::File->new($outfile,"a")
or die "could not open the file $outfile for writing: $! \n";
$comment = "# This table contains code points from Table A.1 from RFC 3454\n";
readPrint($inFH,$outFH, $comment, "A");
close($outFH);
}
#-----------------------------------------------------------------------
sub createCaseMapNoNorm{
($inFH,$outfile) = @_;
$outFH = IO::File->new($outfile,"a")
or die "could not open the file $outfile for writing: $! \n";
$comment = "# This table contains code points from Table B.1 from RFC 3454\n";
readPrint($inFH,$outFH,$comment, "B.1");
close($outFH);
}
#-----------------------------------------------------------------------
sub createCaseMap{
($inFH,$outfile) = @_;
$outFH = IO::File->new($outfile,"a")
or die "could not open the file $outfile for writing: $! \n";
$comment = $warning."# This table contains code points from Table B.2 from RFC 3454\n";
readPrint($inFH,$outFH,$comment, "B.2");
close($outFH);
}
#-----------------------------------------------------------------------
sub createProhibitedTable{
($inFH,$outfile,$line) = @_;
$line =~ s/Start//;
$line =~ s/-//g;
$comment = "# code points from $line";
$outFH = IO::File->new($outfile, "a")
or die "could not open the file $outfile for writing: $! \n";
readPrint($inFH,$outFH,$comment, "C");
close($outFH);
}
#-----------------------------------------------------------------------
sub createLDHCharTable{
($inFH,$outfile,$line) = @_;
$comment ="# code points for LDH chars \n";
$outFH = IO::File->new($outfile, "a")
or die "could not open the file $outfile for writing: $! \n";
print $outFH $comment;
print $outFH "002E; ; LABEL_SEPARATOR\n";
print $outFH "3002; ; LABEL_SEPARATOR\n";
print $outFH "FF0E; ; LABEL_SEPARATOR\n";
print $outFH "FF61; ; LABEL_SEPARATOR\n";
print $outFH "\n# Total code points 4\n";
close($outFH);
}
#-----------------------------------------------------------------------
sub usage {
print << "END";
Usage:
filterRFC3454.pl
Options:
--sourcedir=<directory>
--destdir=<directory>
--src-filename=<name of RFC file>
--dest-filename=<name of destination file>
--A1 Generate data for table A1
--B1 Generate data for table B1
--B2 Generate data for table B2
--C11 Generate data for table C11
--C12 Generate data for table C12
--C21 Generate data for table C21
--C22 Generate data for table C22
--C3 Generate data for table C3
--C4 Generate data for table C4
--C5 Generate data for table C5
--C6 Generate data for table C6
--C7 Generate data for table C7
--C8 Generate data for table C8
--C9 Generate data for table C9
--ldh-chars Generate data for LDH chars used in IDNA
e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt --dest-filename=NamePrepProfile.txt --A1 --B2 --C12 --C21 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9 --ldh-chars
filterRFC3454.pl filters the RFC file and creates String prep table files.
The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt
END
exit(0);
}

View file

@ -0,0 +1,102 @@
.\" Hey, Emacs! This is -*-nroff-*- you know...
.\"
.\" gensprep.8: manual page for the gensprep utility
.\"
.\" Copyright (C) 2003 IBM, Inc. and others.
.\"
.TH gensprep 8 "18 March 2003" "ICU MANPAGE" "ICU @VERSION@ Manual"
.SH NAME
.B gensprep
\- compile StringPrep data from files filtered by filterRFC3454.pl
.SH SYNOPSIS
.B gensprep
[
.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
]
[
.BR "\-v\fP, \fB\-\-verbose"
]
[
.BI "\-c\fP, \fB\-\-copyright"
]
[
.BI "\-s\fP, \fB\-\-sourcedir" " source"
]
[
.BI "\-d\fP, \fB\-\-destdir" " destination"
]
.SH DESCRIPTION
.B gensprep
reads filtered RFC 3454 files and compiles their
information into a binary form.
The resulting file,
.BR <name>.icu ,
can then be read directly by ICU, or used by
.BR pkgdata (8)
for incorporation into a larger archive or library.
.LP
The files read by
.B gensprep
are described in the
.B FILES
section.
.SH OPTIONS
.TP
.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
Print help about usage and exit.
.TP
.BR "\-v\fP, \fB\-\-verbose"
Display extra informative messages during execution.
.TP
.BI "\-c\fP, \fB\-\-copyright"
Include a copyright notice into the binary data.
.TP
.BI "\-s\fP, \fB\-\-sourcedir" " source"
Set the source directory to
.IR source .
The default source directory is specified by the environment variable
.BR ICU_DATA .
.TP
.BI "\-d\fP, \fB\-\-destdir" " destination"
Set the destination directory to
.IR destination .
The default destination directory is specified by the environment variable
.BR ICU_DATA .
.SH ENVIRONMENT
.TP 10
.B ICU_DATA
Specifies the directory containing ICU data. Defaults to
.BR @thepkgicudatadir@/@PACKAGE@/@VERSION@/ .
Some tools in ICU depend on the presence of the trailing slash. It is thus
important to make sure that it is present if
.B ICU_DATA
is set.
.SH FILES
The following files are read by
.B gensprep
and are looked for in the
.I source
/misc for rfc3454_*.txt files and in
.I source
/unidata for NormalizationCorrections.txt.
.TP 20
.B rfc3453_A_1.txt
Contains the list of unassigned codepoints in Unicode version 3.2.0.\|.\|..
.TP
.B rfc3454_B_1.txt
Contains the list of code points that are commonly mapped to nothing.\|.\|..
.TP
.B rfc3454_B_2.txt
Contains the list of mappings for casefolding of code points when Normalization form NFKC is specified.\|.\|..
.TP
.B rfc3454_C_X.txt
Contains the list of code points that are prohibited for IDNA.
.TP
.B NormalizationCorrections.txt
Contains the list of code points whose normalization has changed since Unicode Version 3.2.0.
.SH VERSION
@VERSION@
.SH COPYRIGHT
Copyright (C) 2000-2002 IBM, Inc. and others.
.SH SEE ALSO
.BR pkgdata (8)

View file

@ -0,0 +1,425 @@
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: gensprep.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003-02-06
* created by: Ram Viswanadha
*
* This program reads the Profile.txt files,
* parses them, and extracts the data for StringPrep profile.
* It then preprocesses it and writes a binary file for efficient use
* in various StringPrep conversion processes.
*/
#include <stdio.h>
#include <stdlib.h>
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "unicode/putil.h"
#include "cmemory.h"
#include "cstring.h"
#include "unicode/udata.h"
#include "unewdata.h"
#include "uoptions.h"
#include "uparse.h"
#include "unicode/uset.h"
#include "uprops.h"
#include "sprpimpl.h"
U_CDECL_BEGIN
#include "gensprep.h"
U_CDECL_END
#ifdef WIN32
# pragma warning(disable: 4100)
#endif
UBool beVerbose=FALSE, haveCopyright=TRUE;
#define NORM_CORRECTIONS_FILE_NAME "NormalizationCorrections.txt"
/* prototypes --------------------------------------------------------------- */
static void
parseMappings(const char *filename, UBool reportError, UErrorCode *pErrorCode);
static void
parseNormalizationCorrections(const char *filename, UErrorCode *pErrorCode);
static void
printMapping(UChar32 cp,UChar32* mapping, int32_t mappingLength);
static const char *UNIDATA_DIR = "unidata";
static const char *MISC_DIR = "misc";
/* -------------------------------------------------------------------------- */
static UOption options[]={
UOPTION_HELP_H,
UOPTION_HELP_QUESTION_MARK,
UOPTION_VERBOSE,
UOPTION_COPYRIGHT,
UOPTION_DESTDIR,
UOPTION_SOURCEDIR,
UOPTION_PACKAGE_NAME,
UOPTION_BUNDLE_NAME,
{ "normalization", NULL, NULL, NULL, 'n', UOPT_REQUIRES_ARG, 0 },
{ "check-bidi", NULL, NULL, NULL, 'k', UOPT_NO_ARG, 0},
{ "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 },
};
enum{
HELP,
HELP_QUESTION_MARK,
VERBOSE,
COPYRIGHT,
DESTDIR,
SOURCEDIR,
PACKAGE_NAME,
BUNDLE_NAME,
NORMALIZE,
CHECK_BIDI,
UNICODE_VERSION
};
static int printHelp(int argc, char* argv[]){
/*
* Broken into chucks because the C89 standard says the minimum
* required supported string length is 509 bytes.
*/
fprintf(stderr,
"Usage: %s [-options] [file_name]\n"
"\n"
"Read the files specified and\n"
"create a binary file [package-name]_[bundle-name]." DATA_TYPE " with the StringPrep profile data\n"
"\n",
argv[0]);
fprintf(stderr,
"Options:\n"
"\t-h or -? or --help print this usage text\n"
"\t-v or --verbose verbose output\n"
"\t-c or --copyright include a copyright notice\n");
fprintf(stderr,
"\t-d or --destdir destination directory, followed by the path\n"
"\t-s or --sourcedir source directory of ICU data, followed by the path\n"
"\t-b or --bundle-name generate the ouput data file with the name specified\n"
"\t-p or --package-name prepend the output data file name with the package name specified\n"
"\t-n or --normalize turn on the option for normalization and include mappings\n"
"\t from NormalizationCorrections.txt from the given path,\n"
"\t e.g: /test/icu/source/data/unidata\n"
"\t-k or --check-bidi turn on the option for checking for BiDi in the profile\n"
"\t-u or --unicode version of Unicode to be used with this profile followed by the version\n"
);
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
}
extern int
main(int argc, char* argv[]) {
#if !UCONFIG_NO_IDNA
char* filename = NULL;
#endif
const char *srcDir=NULL, *destDir=NULL, *icuUniDataDir=NULL;
const char *packageName=NULL, *bundleName=NULL, *inputFileName = NULL;
char *basename=NULL;
int32_t sprepOptions = 0;
UErrorCode errorCode=U_ZERO_ERROR;
U_MAIN_INIT_ARGS(argc, argv);
/* preset then read command line options */
options[DESTDIR].value=u_getDataDirectory();
options[SOURCEDIR].value="";
options[UNICODE_VERSION].value="0"; /* don't assume the unicode version */
options[BUNDLE_NAME].value = DATA_NAME;
options[PACKAGE_NAME].value = U_ICUDATA_NAME;
options[NORMALIZE].value = "";
argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
/* error handling, printing usage message */
if(argc<0) {
fprintf(stderr,
"error in command line argument \"%s\"\n",
argv[-argc]);
}
if(argc<0 || options[HELP].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
return printHelp(argc, argv);
}
/* get the options values */
beVerbose=options[VERBOSE].doesOccur;
haveCopyright=options[COPYRIGHT].doesOccur;
srcDir=options[SOURCEDIR].value;
destDir=options[DESTDIR].value;
packageName = options[PACKAGE_NAME].value;
bundleName = options[BUNDLE_NAME].value;
icuUniDataDir = options[NORMALIZE].value;
if(argc<2) {
/* print the help message */
return printHelp(argc, argv);
} else {
inputFileName = argv[1];
}
if(!options[UNICODE_VERSION].doesOccur){
return printHelp(argc, argv);
}
#if UCONFIG_NO_IDNA
fprintf(stderr,
"gensprep writes dummy " U_ICUDATA_NAME "_" _SPREP_DATA_NAME "." DATA_TYPE
" because UCONFIG_NO_IDNA is set, \n"
"see icu/source/common/unicode/uconfig.h\n");
generateData(destDir);
#else
setUnicodeVersion(options[UNICODE_VERSION].value);
filename = (char* ) uprv_malloc(uprv_strlen(srcDir) + 300); /* hopefully this should be enough */
/* prepare the filename beginning with the source dir */
if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){
filename[0] = 0x2E;
filename[1] = U_FILE_SEP_CHAR;
uprv_strcpy(filename+2,srcDir);
}else{
uprv_strcpy(filename, srcDir);
}
basename=filename+uprv_strlen(filename);
if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
*basename++=U_FILE_SEP_CHAR;
}
/* initialize */
init();
/* process the file */
uprv_strcpy(basename,inputFileName);
parseMappings(filename,FALSE, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "Could not open file %s for reading. Error: %s \n", filename, u_errorName(errorCode));
return errorCode;
}
if(options[NORMALIZE].doesOccur){
/* set up directory for NormalizationCorrections.txt */
uprv_strcpy(filename,icuUniDataDir);
basename=filename+uprv_strlen(filename);
if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
*basename++=U_FILE_SEP_CHAR;
}
*basename++=U_FILE_SEP_CHAR;
uprv_strcpy(basename,NORM_CORRECTIONS_FILE_NAME);
parseNormalizationCorrections(filename,&errorCode);
if(U_FAILURE(errorCode)){
fprintf(stderr,"Could not open file %s for reading \n", filename);
return errorCode;
}
sprepOptions |= _SPREP_NORMALIZATION_ON;
}
if(options[CHECK_BIDI].doesOccur){
sprepOptions |= _SPREP_CHECK_BIDI_ON;
}
setOptions(sprepOptions);
/* process parsed data */
if(U_SUCCESS(errorCode)) {
/* write the data file */
generateData(destDir, packageName, bundleName);
cleanUpData();
}
uprv_free(filename);
#endif
return errorCode;
}
#if !UCONFIG_NO_IDNA
static void U_CALLCONV
normalizationCorrectionsLineFn(void *context,
char *fields[][2], int32_t fieldCount,
UErrorCode *pErrorCode) {
uint32_t mapping[40];
char *end, *s;
uint32_t code;
int32_t length;
UVersionInfo version;
UVersionInfo thisVersion;
/* get the character code, field 0 */
code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "gensprep: error parsing NormalizationCorrections.txt mapping at %s\n", fields[0][0]);
exit(*pErrorCode);
}
/* Original (erroneous) decomposition */
s = fields[1][0];
/* parse the mapping string */
length=u_parseCodePoints(s, mapping, sizeof(mapping)/4, pErrorCode);
/* ignore corrected decomposition */
u_versionFromString(version,fields[3][0] );
u_versionFromString(thisVersion, "3.2.0");
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "gensprep error parsing NormalizationCorrections.txt of U+%04lx - %s\n",
(long)code, u_errorName(*pErrorCode));
exit(*pErrorCode);
}
/* store the mapping */
if( version[0] > thisVersion[0] ||
((version[0]==thisVersion[0]) && (version[1] > thisVersion[1]))
){
storeMapping(code,mapping, length, USPREP_MAP, pErrorCode);
}
setUnicodeVersionNC(version);
}
static void
parseNormalizationCorrections(const char *filename, UErrorCode *pErrorCode) {
char *fields[4][2];
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return;
}
u_parseDelimitedFile(filename, ';', fields, 4, normalizationCorrectionsLineFn, NULL, pErrorCode);
/* fprintf(stdout,"Number of code points that have NormalizationCorrections mapping with length >1 : %i\n",len); */
if(U_FAILURE(*pErrorCode) && ( *pErrorCode!=U_FILE_ACCESS_ERROR)) {
fprintf(stderr, "gensprep error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
exit(*pErrorCode);
}
}
static void U_CALLCONV
strprepProfileLineFn(void *context,
char *fields[][2], int32_t fieldCount,
UErrorCode *pErrorCode) {
uint32_t mapping[40];
char *end, *map;
uint32_t code;
int32_t length;
/*UBool* mapWithNorm = (UBool*) context;*/
const char* typeName;
uint32_t rangeStart=0,rangeEnd =0;
const char* filename = (const char*) context;
typeName = fields[2][0];
map = fields[1][0];
if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
if(U_FAILURE(*pErrorCode)){
fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
return;
}
/* store the range */
storeRange(rangeStart,rangeEnd,USPREP_UNASSIGNED, pErrorCode);
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
if(U_FAILURE(*pErrorCode)){
fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
return;
}
/* store the range */
storeRange(rangeStart,rangeEnd,USPREP_PROHIBITED, pErrorCode);
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
/* get the character code, field 0 */
code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
if(end<=fields[0][0] || end!=fields[0][1]) {
fprintf(stderr, "gensprep: syntax error in field 0 at %s\n", fields[0][0]);
*pErrorCode=U_PARSE_ERROR;
exit(U_PARSE_ERROR);
}
/* parse the mapping string */
length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
/* store the mapping */
storeMapping(code,mapping, length,USPREP_MAP, pErrorCode);
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_LABEL_SEPARATOR])!=NULL){
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
if(U_FAILURE(*pErrorCode)){
fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
return;
}
/* store the range */
storeRange(rangeStart,rangeEnd,USPREP_LABEL_SEPARATOR, pErrorCode);
}else{
*pErrorCode = U_INVALID_FORMAT_ERROR;
}
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "gensprep error parsing %s line %s at %s\n",filename,
fields[0][0],fields[2][0],u_errorName(*pErrorCode));
exit(*pErrorCode);
}
}
static void
parseMappings(const char *filename, UBool reportError, UErrorCode *pErrorCode) {
char *fields[3][2];
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return;
}
u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode);
/*fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);*/
if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
fprintf(stderr, "gensprep error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
exit(*pErrorCode);
}
}
#endif /* #if !UCONFIG_NO_IDNA */
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/

View file

@ -0,0 +1,206 @@
# Microsoft Developer Studio Project File - Name="gensprep" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 6.00
# ** DO NOT EDIT **
# TARGTYPE "Win32 (x86) Console Application" 0x0103
CFG=gensprep - Win32 Debug
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
!MESSAGE use the Export Makefile command and run
!MESSAGE
!MESSAGE NMAKE /f "gensprep.mak".
!MESSAGE
!MESSAGE You can specify a configuration when running NMAKE
!MESSAGE by defining the macro CFG on the command line. For example:
!MESSAGE
!MESSAGE NMAKE /f "gensprep.mak" CFG="gensprep - Win32 Debug"
!MESSAGE
!MESSAGE Possible choices for configuration are:
!MESSAGE
!MESSAGE "gensprep - Win32 Release" (based on "Win32 (x86) Console Application")
!MESSAGE "gensprep - Win32 Debug" (based on "Win32 (x86) Console Application")
!MESSAGE "gensprep - Win64 Release" (based on "Win32 (x86) Console Application")
!MESSAGE "gensprep - Win64 Debug" (based on "Win32 (x86) Console Application")
!MESSAGE
# Begin Project
# PROP AllowPerConfigDependencies 0
# PROP Scc_ProjName ""
# PROP Scc_LocalPath ""
CPP=cl.exe
RSC=rc.exe
!IF "$(CFG)" == "gensprep - Win32 Release"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "Release"
# PROP BASE Intermediate_Dir "Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release"
# PROP Intermediate_Dir "Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
MTL=midl.exe
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
# ADD CPP /nologo /W3 /GX /O2 /I "..\..\common" /I "..\toolutil" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
# ADD LINK32 icuuc.lib icutu.lib /nologo /subsystem:console /machine:I386 /libpath:"..\..\..\lib"
# Begin Custom Build
TargetPath=.\Release\gensprep.exe
InputPath=.\Release\gensprep.exe
InputName=gensprep
SOURCE="$(InputPath)"
"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(TargetPath) ..\..\..\bin
# End Custom Build
!ELSEIF "$(CFG)" == "gensprep - Win32 Debug"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "Debug"
# PROP BASE Intermediate_Dir "Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "Debug"
# PROP Intermediate_Dir "Debug"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
MTL=midl.exe
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /I "..\..\common" /I "..\toolutil" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
# ADD LINK32 icutud.lib icuucd.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\..\..\lib"
# Begin Custom Build
TargetPath=.\Debug\gensprep.exe
InputPath=.\Debug\gensprep.exe
InputName=gensprep
SOURCE="$(InputPath)"
"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(TargetPath) ..\..\..\bin
# End Custom Build
!ELSEIF "$(CFG)" == "gensprep - Win64 Release"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "Release"
# PROP BASE Intermediate_Dir "Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release"
# PROP Intermediate_Dir "Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
MTL=midl.exe
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
# ADD CPP /nologo /W3 /GX /Zi /O2 /Op /I "..\..\common" /I "..\toolutil" /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /QIA64_fmaopt /Zm600 /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:IX86 /machine:IA64
# ADD LINK32 icuuc.lib icutu.lib /nologo /subsystem:console /machine:IX86 /libpath:"..\..\..\lib" /machine:IA64
# Begin Custom Build
TargetPath=.\Release\gensprep.exe
InputPath=.\Release\gensprep.exe
InputName=gensprep
SOURCE="$(InputPath)"
"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(TargetPath) ..\..\..\bin
# End Custom Build
!ELSEIF "$(CFG)" == "gensprep - Win64 Debug"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "Debug"
# PROP BASE Intermediate_Dir "Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "Debug"
# PROP Intermediate_Dir "Debug"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
MTL=midl.exe
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /Op /I "..\..\common" /I "..\toolutil" /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /GZ /QIA64_fmaopt /Zm600 /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:IX86 /pdbtype:sept /machine:IA64
# ADD LINK32 icutud.lib icuucd.lib /nologo /subsystem:console /incremental:no /debug /machine:IX86 /pdbtype:sept /libpath:"..\..\..\lib" /machine:IA64
# Begin Custom Build
TargetPath=.\Debug\gensprep.exe
InputPath=.\Debug\gensprep.exe
InputName=gensprep
SOURCE="$(InputPath)"
"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(TargetPath) ..\..\..\bin
# End Custom Build
!ENDIF
# Begin Target
# Name "gensprep - Win32 Release"
# Name "gensprep - Win32 Debug"
# Name "gensprep - Win64 Release"
# Name "gensprep - Win64 Debug"
# Begin Group "Source Files"
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
# Begin Source File
SOURCE=.\gensprep.c
# End Source File
# Begin Source File
SOURCE=.\store.c
# End Source File
# End Group
# Begin Group "Header Files"
# PROP Default_Filter "h;hpp;hxx;hm;inl"
# Begin Source File
SOURCE=.\gensprep.h
# End Source File
# End Group
# Begin Group "Resource Files"
# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
# End Group
# End Target
# End Project

View file

@ -0,0 +1,82 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: genidn.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003-02-06
* created by: Ram Viswanadha
*/
#ifndef __GENIDN_H__
#define __GENIDN_H__
#include "unicode/utypes.h"
#include "unicode/uset.h"
#include "sprpimpl.h"
/* file definitions */
#define DATA_NAME "sprep"
#define DATA_TYPE "spp"
/*
* data structure that holds the IDN properties for one or more
* code point(s) at build time
*/
/* global flags */
extern UBool beVerbose, haveCopyright;
/* prototypes */
extern void
setUnicodeVersion(const char *v);
extern void
setUnicodeVersionNC(UVersionInfo version);
extern void
init(void);
extern void
storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length, UStringPrepType type, UErrorCode* status);
extern void
storeRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status);
extern void
generateData(const char *dataDir, const char* packageName, const char* bundleName);
extern void
setOptions(int32_t options);
extern void
cleanUpData(void);
/*
extern void
storeIDN(uint32_t code, IDN *idn);
extern void
processData(void);
*/
#endif
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/

View file

@ -0,0 +1,153 @@
<?xml version="1.0" encoding = "Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="7.00"
Name="gensprep"
SccProjectName=""
SccLocalPath="">
<Platforms>
<Platform
Name="Win32"/>
</Platforms>
<Configurations>
<Configuration
Name="Release|Win32"
OutputDirectory=".\Release"
IntermediateDirectory=".\Release"
ConfigurationType="1"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="FALSE"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
InlineFunctionExpansion="2"
ImproveFloatingPointConsistency="TRUE"
AdditionalIncludeDirectories="..\..\common,..\toolutil"
PreprocessorDefinitions="WIN32,NDEBUG,_CONSOLE"
StringPooling="TRUE"
RuntimeLibrary="4"
EnableFunctionLevelLinking="TRUE"
PrecompiledHeaderFile=".\Release/gensprep.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="3"
SuppressStartupBanner="TRUE"
CompileAs="0"/>
<Tool
Name="VCCustomBuildTool"
CommandLine="copy $(TargetPath) ..\..\..\bin
"
Outputs="..\..\..\bin\$(InputName).exe"/>
<Tool
Name="VCLinkerTool"
AdditionalOptions="/MACHINE:I386"
AdditionalDependencies="icuuc.lib icutu.lib"
OutputFile=".\Release/gensprep.exe"
LinkIncremental="1"
SuppressStartupBanner="TRUE"
AdditionalLibraryDirectories="..\..\..\lib"
ProgramDatabaseFile=".\Release/gensprep.pdb"
SubSystem="1"/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Release/gensprep.tlb"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1033"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
</Configuration>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="1"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="FALSE"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\common,..\toolutil"
PreprocessorDefinitions="WIN32,_DEBUG,_CONSOLE"
BasicRuntimeChecks="3"
RuntimeLibrary="5"
PrecompiledHeaderFile=".\Debug/gensprep.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="3"
SuppressStartupBanner="TRUE"
DebugInformationFormat="4"
CompileAs="0"/>
<Tool
Name="VCCustomBuildTool"
CommandLine="copy $(TargetPath) ..\..\..\bin
"
Outputs="..\..\..\bin\$(InputName).exe"/>
<Tool
Name="VCLinkerTool"
AdditionalOptions="/MACHINE:I386"
AdditionalDependencies="icutud.lib icuucd.lib"
OutputFile=".\Debug/gensprep.exe"
LinkIncremental="2"
SuppressStartupBanner="TRUE"
AdditionalLibraryDirectories="..\..\..\lib"
GenerateDebugInformation="TRUE"
ProgramDatabaseFile=".\Debug/gensprep.pdb"
SubSystem="1"/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Debug/gensprep.tlb"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1033"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
</Configuration>
</Configurations>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat">
<File
RelativePath=".\gensprep.c">
</File>
<File
RelativePath=".\store.c">
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl">
<File
RelativePath=".\gensprep.h">
</File>
</Filter>
<Filter
Name="Resource Files"
Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe">
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View file

@ -0,0 +1,608 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: store.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003-02-06
* created by: Ram Viswanadha
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "cmemory.h"
#include "cstring.h"
#include "filestrm.h"
#include "unicode/udata.h"
#include "utrie.h"
#include "unicode/uset.h"
#include "unewdata.h"
#include "gensprep.h"
#include "uhash.h"
#ifdef WIN32
# pragma warning(disable: 4100)
#endif
#define DO_DEBUG_OUT 0
/**
This is a simple Trie with the following structure
16-bit USPREP sets:
if(trieWord >= 0xFFF0){
UStringPrepType enum = value - 0xFFF0;
}else{
Bit
0 ON: USPREP_PROHIBITED
1 OFF: the next 13 bits contain the delta
ON: the next 13 bits contain the index into the mapping array
2..15 Contain the index into the mapping array or delta
}
*/
/* file data ---------------------------------------------------------------- */
/* indexes[] value names */
#if UCONFIG_NO_IDNA
/* dummy UDataInfo cf. udata.h */
static UDataInfo dataInfo = {
sizeof(UDataInfo),
0,
U_IS_BIG_ENDIAN,
U_CHARSET_FAMILY,
U_SIZEOF_UCHAR,
0,
{ 0, 0, 0, 0 }, /* dummy dataFormat */
{ 0, 0, 0, 0 }, /* dummy formatVersion */
{ 0, 0, 0, 0 } /* dummy dataVersion */
};
#else
static int32_t indexes[_SPREP_INDEX_TOP]={ 0 };
static uint16_t* mappingData= NULL;
static int32_t mappingDataCapacity = 0; /* we skip the first index in mapping data */
static int16_t currentIndex = 0; /* the current index into the data trie */
static int32_t maxLength = 0; /* maximum length of mapping string */
/* UDataInfo cf. udata.h */
static UDataInfo dataInfo={
sizeof(UDataInfo),
0,
U_IS_BIG_ENDIAN,
U_CHARSET_FAMILY,
U_SIZEOF_UCHAR,
0,
{ 0x53, 0x50, 0x52, 0x50 }, /* dataFormat="SPRP" */
{ 3, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */
{ 3, 2, 0, 0 } /* dataVersion (Unicode version) */
};
void
setUnicodeVersion(const char *v) {
UVersionInfo version;
u_versionFromString(version, v);
uprv_memcpy(dataInfo.dataVersion, version, 4);
}
void
setUnicodeVersionNC(UVersionInfo version){
uint32_t univer = version[0] << 24;
univer += version[1] << 16;
univer += version[2] << 8;
univer += version[3];
indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] = univer;
}
static UNewTrie *sprepTrie;
#define MAX_DATA_LENGTH 11500
#define SPREP_DELTA_RANGE_POSITIVE_LIMIT 8191
#define SPREP_DELTA_RANGE_NEGATIVE_LIMIT -8192
extern void
init() {
sprepTrie = (UNewTrie *)uprv_malloc(sizeof(UNewTrie));
uprv_memset(sprepTrie, 0, sizeof(UNewTrie));
/* initialize the two tries */
if(NULL==utrie_open(sprepTrie, NULL, MAX_DATA_LENGTH, 0, 0, FALSE)) {
fprintf(stderr, "error: failed to initialize tries\n");
exit(U_MEMORY_ALLOCATION_ERROR);
}
}
static UHashtable* hashTable = NULL;
struct ValueStruct {
UChar* mapping;
int16_t length;
UStringPrepType type;
};
typedef struct ValueStruct ValueStruct;
/* Callback for deleting the value from the hashtable */
void U_CALLCONV valueDeleter(void* obj){
ValueStruct* value = (ValueStruct*) obj;
uprv_free(value->mapping);
uprv_free(value);
}
/* Callback for hashing the entry */
static int32_t U_CALLCONV hashEntry(const UHashTok parm) {
return parm.integer;
}
/* Callback for comparing two entries */
static UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) {
return (UBool)(p1.integer != p2.integer);
}
static void
storeMappingData(){
int32_t pos = -1;
const UHashElement* element = NULL;
ValueStruct* value = NULL;
int32_t codepoint = 0;
int32_t elementCount = uhash_count(hashTable);
int32_t writtenElementCount = 0;
int32_t mappingLength = 1; /* minimum mapping length */
int32_t oldMappingLength = 0;
uint16_t trieWord =0;
int32_t limitIndex = 0;
/*initialize the mapping data */
mappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * (mappingDataCapacity));
uprv_memset(mappingData,0,U_SIZEOF_UCHAR * mappingDataCapacity);
while(writtenElementCount < elementCount){
while( (element = uhash_nextElement(hashTable, &pos))!=NULL){
codepoint = element->key.integer;
value = (ValueStruct*)element->value.pointer;
/* store the start of indexes */
if(oldMappingLength != mappingLength){
/* Assume that index[] is used according to the enums defined */
if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){
indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex;
}
if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH &&
mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){
limitIndex = currentIndex;
}
oldMappingLength = mappingLength;
}
if(value->length == mappingLength){
uint32_t savedTrieWord = 0;
trieWord = currentIndex << 2;
/* turn on the 2nd bit to signal that the following bits contain an index */
trieWord += 0x02;
if(trieWord > _SPREP_TYPE_THRESHOLD){
fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
exit(U_ILLEGAL_CHAR_FOUND);
}
/* figure out if the code point has type already stored */
savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
if(savedTrieWord!=0){
if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
/* turn on the first bit in trie word */
trieWord += 0x01;
}else{
/*
* the codepoint has value something other than prohibited
* and a mapping .. error!
*/
fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", codepoint);
exit(U_ILLEGAL_ARGUMENT_ERROR);
}
}
/* now set the value in the trie */
if(!utrie_set32(sprepTrie,codepoint,trieWord)){
fprintf(stderr,"Could not set the value for code point.\n");
exit(U_ILLEGAL_ARGUMENT_ERROR);
}
/* written the trie word for the codepoint... increment the count*/
writtenElementCount++;
/* sanity check are we exceeding the max number allowed */
if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){
fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n", currentIndex+value->length, _SPREP_MAX_INDEX_VALUE);
exit(U_INDEX_OUTOFBOUNDS_ERROR);
}
/* copy the mapping data */
if(currentIndex+value->length+1 <= mappingDataCapacity){
/* write the length */
if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
/* the cast here is safe since we donot expect the length to be > 65535 */
mappingData[currentIndex++] = (uint16_t) mappingLength;
}
/* copy the contents to mappindData array */
uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
currentIndex += value->length;
}else{
/* realloc */
UChar* newMappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * mappingDataCapacity*2);
if(newMappingData == NULL){
fprintf(stderr, "Could not realloc the mapping data!\n");
exit(U_MEMORY_ALLOCATION_ERROR);
}
uprv_memmove(newMappingData, mappingData, U_SIZEOF_UCHAR * mappingDataCapacity);
mappingDataCapacity *= 2;
uprv_free(mappingData);
mappingData = newMappingData;
/* write the length */
if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
/* the cast here is safe since we donot expect the length to be > 65535 */
mappingData[currentIndex++] = (uint16_t) mappingLength;
}
/* continue copying */
uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
currentIndex += value->length;
}
}
}
mappingLength++;
pos = -1;
}
/* set the last length for range check */
if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){
indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1;
}else{
indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex;
}
}
extern void setOptions(int32_t options){
indexes[_SPREP_OPTIONS] = options;
}
extern void
storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length,
UStringPrepType type, UErrorCode* status){
UChar* map = NULL;
int16_t adjustedLen=0, i;
uint16_t trieWord = 0;
ValueStruct *value = NULL;
uint32_t savedTrieWord = 0;
/* initialize the hashtable */
if(hashTable==NULL){
hashTable = uhash_open(hashEntry, compareEntries, status);
uhash_setValueDeleter(hashTable, valueDeleter);
}
/* figure out if the code point has type already stored */
savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
if(savedTrieWord!=0){
if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
/* turn on the first bit in trie word */
trieWord += 0x01;
}else{
/*
* the codepoint has value something other than prohibited
* and a mapping .. error!
*/
fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", codepoint);
exit(U_ILLEGAL_ARGUMENT_ERROR);
}
}
/* figure out the real length */
for(i=0; i<length; i++){
if(mapping[i] > 0xFFFF){
adjustedLen +=2;
}else{
adjustedLen++;
}
}
if(adjustedLen == 0){
trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2);
/* make sure that the value of trieWord is less than the threshold */
if(trieWord < _SPREP_TYPE_THRESHOLD){
/* now set the value in the trie */
if(!utrie_set32(sprepTrie,codepoint,trieWord)){
fprintf(stderr,"Could not set the value for code point.\n");
exit(U_ILLEGAL_ARGUMENT_ERROR);
}
/* value is set so just return */
return;
}else{
fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
exit(U_ILLEGAL_CHAR_FOUND);
}
}
if(adjustedLen == 1){
/* calculate the delta */
int16_t delta = (int32_t)codepoint - (int16_t) mapping[0];
if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){
trieWord = delta << 2;
/* make sure that the second bit is OFF */
if((trieWord & 0x02) != 0 ){
fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n");
exit(U_INTERNAL_PROGRAM_ERROR);
}
/* make sure that the value of trieWord is less than the threshold */
if(trieWord < _SPREP_TYPE_THRESHOLD){
/* now set the value in the trie */
if(!utrie_set32(sprepTrie,codepoint,trieWord)){
fprintf(stderr,"Could not set the value for code point.\n");
exit(U_ILLEGAL_ARGUMENT_ERROR);
}
/* value is set so just return */
return;
}
}
/*
* if the delta is not in the given range or if the trieWord is larger than the threshold
* just fall through for storing the mapping in the mapping table
*/
}
map = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (adjustedLen+1));
uprv_memset(map,0,U_SIZEOF_UCHAR * (adjustedLen+1));
i=0;
while(i<length){
if(mapping[i] <= 0xFFFF){
map[i] = (uint16_t)mapping[i];
}else{
map[i] = UTF16_LEAD(mapping[i]);
map[i+1] = UTF16_TRAIL(mapping[i]);
}
i++;
}
value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct));
value->mapping = map;
value->type = type;
value->length = adjustedLen;
if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){
mappingDataCapacity++;
}
if(maxLength < value->length){
maxLength = value->length;
}
uhash_iput(hashTable,codepoint,value,status);
mappingDataCapacity += adjustedLen;
if(U_FAILURE(*status)){
fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status));
exit(*status);
}
}
extern void
storeRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status){
uint16_t trieWord = 0;
uint32_t i=0;
trieWord += (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */
if(trieWord > 0xFFFF){
fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n");
exit(U_ILLEGAL_CHAR_FOUND);
}
if(start == end){
uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL);
if(savedTrieWord>0){
if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){
/*
* A mapping is stored in the trie word
* and the only other possible type that a
* code point can have is USPREP_PROHIBITED
*
*/
/* turn on the 0th bit in the savedTrieWord */
savedTrieWord += 0x01;
/* the downcast is safe since we only save 16 bit values */
trieWord = (uint16_t)savedTrieWord;
/* make sure that the value of trieWord is less than the threshold */
if(trieWord < _SPREP_TYPE_THRESHOLD){
/* now set the value in the trie */
if(!utrie_set32(sprepTrie,start,trieWord)){
fprintf(stderr,"Could not set the value for code point.\n");
exit(U_ILLEGAL_ARGUMENT_ERROR);
}
/* value is set so just return */
return;
}else{
fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
exit(U_ILLEGAL_CHAR_FOUND);
}
}else if(savedTrieWord != trieWord){
fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", start);
exit(U_ILLEGAL_ARGUMENT_ERROR);
}
/* if savedTrieWord == trieWord .. fall through and set the value */
}
if(!utrie_set32(sprepTrie,start,trieWord)){
fprintf(stderr,"Could not set the value for code point \\U%08X.\n", start);
exit(U_ILLEGAL_ARGUMENT_ERROR);
}
}else{
if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){
fprintf(stderr,"Value for certain codepoint already set.\n");
exit(U_ILLEGAL_CHAR_FOUND);
}
}
}
/* folding value: just store the offset (16 bits) if there is any non-0 entry */
static uint32_t U_CALLCONV
getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
uint32_t foldedValue, value;
UChar32 limit=0;
UBool inBlockZero;
foldedValue=0;
limit=start+0x400;
while(start<limit) {
value=utrie_get32(trie, start, &inBlockZero);
if(inBlockZero) {
start+=UTRIE_DATA_BLOCK_LENGTH;
} else if(value!=0) {
return (uint32_t)offset;
} else {
++start;
}
}
return 0;
}
#endif /* #if !UCONFIG_NO_IDNA */
extern void
generateData(const char *dataDir, const char *packageName, const char* bundleName) {
static uint8_t sprepTrieBlock[100000];
UNewDataMemory *pData;
UErrorCode errorCode=U_ZERO_ERROR;
int32_t size, dataLength;
char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100);
#if UCONFIG_NO_IDNA
size=0;
#else
int32_t sprepTrieSize;
/* sort and add mapping data */
storeMappingData();
sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, TRUE, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode));
exit(errorCode);
}
size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes);
if(beVerbose) {
printf("size of sprep trie %5u bytes\n", sprepTrieSize);
printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size);
printf("size of mapping data array %5u bytes\n",mappingDataCapacity * U_SIZEOF_UCHAR);
printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex);
printf("Maximum length of the mapping string is : %i \n", maxLength);
}
#endif
uprv_strcpy(fileName,packageName);
uprv_strcat(fileName,"_");
uprv_strcat(fileName,bundleName);
/* write the data */
pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo,
haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode);
exit(errorCode);
}
#if !UCONFIG_NO_IDNA
indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize;
indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR;
udata_writeBlock(pData, indexes, sizeof(indexes));
udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize);
udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]);
#endif
/* finish up */
dataLength=udata_finish(pData, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode);
exit(errorCode);
}
if(dataLength!=size) {
fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n",
(long)dataLength, (long)size);
exit(U_INTERNAL_PROGRAM_ERROR);
}
/* done with writing the data .. close the hashtable */
uhash_close(hashTable);
}
#if !UCONFIG_NO_IDNA
extern void
cleanUpData(void) {
utrie_close(sprepTrie);
uprv_free(sprepTrie);
}
#endif /* #if !UCONFIG_NO_IDNA */
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/