mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-3050 StringPrep API first cut
X-SVN-Rev: 12674
This commit is contained in:
parent
1349672e9b
commit
9a043c2e5f
57 changed files with 7672 additions and 3692 deletions
8
.gitignore
vendored
8
.gitignore
vendored
|
@ -331,6 +331,14 @@ icu4c/source/tools/genrb/genjp/genjp.positions
|
|||
icu4c/source/tools/genrb/genrb
|
||||
icu4c/source/tools/genrb/genrb.[0-9]
|
||||
icu4c/source/tools/genrb/tmp
|
||||
icu4c/source/tools/gensprep/*.d
|
||||
icu4c/source/tools/gensprep/*.pdb
|
||||
icu4c/source/tools/gensprep/Debug
|
||||
icu4c/source/tools/gensprep/Makefile
|
||||
icu4c/source/tools/gensprep/Release
|
||||
icu4c/source/tools/gensprep/gensprep
|
||||
icu4c/source/tools/gensprep/gensprep.8
|
||||
icu4c/source/tools/gensprep/gensprep.plg
|
||||
icu4c/source/tools/gentest/*.d
|
||||
icu4c/source/tools/gentest/*.pdb
|
||||
icu4c/source/tools/gentest/Debug
|
||||
|
|
|
@ -96,10 +96,10 @@ Package=<4>
|
|||
Project_Dep_Name genbrk
|
||||
End Project Dependency
|
||||
Begin Project Dependency
|
||||
Project_Dep_Name genidna
|
||||
Project_Dep_Name layoutex
|
||||
End Project Dependency
|
||||
Begin Project Dependency
|
||||
Project_Dep_Name layoutex
|
||||
Project_Dep_Name gensprep
|
||||
End Project Dependency
|
||||
}}}
|
||||
|
||||
|
@ -267,24 +267,6 @@ Package=<4>
|
|||
|
||||
###############################################################################
|
||||
|
||||
Project: "genidna"=..\tools\genidna\genidna.dsp - Package Owner=<4>
|
||||
|
||||
Package=<5>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
Package=<4>
|
||||
{{{
|
||||
Begin Project Dependency
|
||||
Project_Dep_Name common
|
||||
End Project Dependency
|
||||
Begin Project Dependency
|
||||
Project_Dep_Name toolutil
|
||||
End Project Dependency
|
||||
}}}
|
||||
|
||||
###############################################################################
|
||||
|
||||
Project: "gennames"=..\tools\gennames\gennames.dsp - Package Owner=<4>
|
||||
|
||||
Package=<5>
|
||||
|
@ -381,6 +363,18 @@ Package=<4>
|
|||
|
||||
###############################################################################
|
||||
|
||||
Project: "gensprep"=..\tools\gensprep\gensprep.dsp - Package Owner=<4>
|
||||
|
||||
Package=<5>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
Package=<4>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
###############################################################################
|
||||
|
||||
Project: "gentest"=..\tools\gentest\gentest.dsp - Package Owner=<4>
|
||||
|
||||
Package=<5>
|
||||
|
@ -606,7 +600,7 @@ Package=<4>
|
|||
Project_Dep_Name genbrk
|
||||
End Project Dependency
|
||||
Begin Project Dependency
|
||||
Project_Dep_Name genidna
|
||||
Project_Dep_Name gensprep
|
||||
End Project Dependency
|
||||
}}}
|
||||
|
||||
|
|
|
@ -3302,14 +3302,6 @@ InputPath=.\unicode\utf_old.h
|
|||
# PROP Default_Filter "*.c,*.h"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\nameprep.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\nameprep.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\punycode.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
@ -3318,18 +3310,6 @@ SOURCE=.\punycode.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\sprpimpl.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\strprep.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\strprep.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\uidna.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
@ -3354,6 +3334,41 @@ InputPath=.\unicode\uidna.h
|
|||
|
||||
!ENDIF
|
||||
|
||||
# End Source File
|
||||
# End Group
|
||||
# Begin Group "sprep"
|
||||
|
||||
# PROP Default_Filter ""
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\sprpimpl.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\usprep.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\unicode\usprep.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\unicode\usprep.h
|
||||
|
||||
"..\..\include\unicode\usprep.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(InputPath) ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win64 Release"
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win64 Debug"
|
||||
|
||||
!ENDIF
|
||||
|
||||
# End Source File
|
||||
# End Group
|
||||
# End Target
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: nameprep.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003feb1
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include "nameprep.h"
|
||||
// *****************************************************************************
|
||||
// class NamePrep
|
||||
// *****************************************************************************
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
const char NamePrep::fgClassID=0;
|
||||
|
||||
// default constructor
|
||||
NamePrep::NamePrep(UErrorCode& status){
|
||||
bidiCheck = TRUE;
|
||||
doNFKC = TRUE;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_IDNA */
|
|
@ -1,102 +0,0 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: nameprep.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003feb1
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#ifndef NAMEPREP_H
|
||||
#define NAMEPREP_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include "strprep.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
/*
|
||||
A profile of stringprep MUST include all of the following:
|
||||
|
||||
- The intended applicability of the profile
|
||||
|
||||
- The character repertoire that is the input and output to stringprep
|
||||
(which is Unicode 3.2 for this version of stringprep)
|
||||
|
||||
- The mapping tables from this document used (as described in section
|
||||
3)
|
||||
|
||||
- Any additional mapping tables specific to the profile
|
||||
|
||||
- The Unicode normalization used, if any (as described in section 4)
|
||||
|
||||
- The tables from this document of characters that are prohibited as
|
||||
output (as described in section 5)
|
||||
|
||||
- The bidirectional string testing used, if any (as described in
|
||||
section 6)
|
||||
|
||||
- Any additional characters that are prohibited as output specific to
|
||||
the profile
|
||||
*/
|
||||
|
||||
|
||||
class NamePrep: public StringPrep {
|
||||
public :
|
||||
NamePrep(UErrorCode& status);
|
||||
|
||||
virtual inline ~NamePrep(){};
|
||||
|
||||
virtual inline UBool isNotProhibited(UChar32 ch);
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for this class.
|
||||
*
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
||||
*
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
|
||||
|
||||
private:
|
||||
/**
|
||||
* The address of this static class variable serves as this class's ID
|
||||
* for ICU "poor man's RTTI".
|
||||
*/
|
||||
static const char fgClassID;
|
||||
};
|
||||
|
||||
inline UBool NamePrep::isNotProhibited(UChar32 ch){
|
||||
return (UBool)(ch == 0x0020); /* ASCII_SPACE */
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_IDNA */
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
|
@ -2367,8 +2367,8 @@ _uRegexErrorName[U_REGEX_ERROR_LIMIT - U_REGEX_ERROR_START] = {
|
|||
static const char * const
|
||||
_uIDNAErrorName[U_IDNA_ERROR_LIMIT - U_IDNA_ERROR_START] = {
|
||||
"U_IDNA_ERROR_START",
|
||||
"U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR",
|
||||
"U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR",
|
||||
"U_IDNA_PROHIBITED_ERROR",
|
||||
"U_IDNA_UNASSIGNED_ERROR",
|
||||
"U_IDNA_CHECK_BIDI_ERROR",
|
||||
"U_IDNA_STD3_ASCII_RULES_ERROR",
|
||||
"U_IDNA_ACE_PREFIX_ERROR",
|
||||
|
|
|
@ -237,6 +237,7 @@ void RBBISetBuilder::build() {
|
|||
NULL, // Data array (utrie will allocate one)
|
||||
100000, // Max Data Length
|
||||
0, // Initial value for all code points
|
||||
0, // Lead surrogate unit value
|
||||
TRUE); // Keep Latin 1 in separately
|
||||
|
||||
|
||||
|
|
|
@ -20,82 +20,136 @@
|
|||
#include "unicode/utypes.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/usprep.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "utrie.h"
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
enum{
|
||||
UIDNA_NO_VALUE = 0x0000 ,
|
||||
UIDNA_UNASSIGNED = 0x0001 ,
|
||||
UIDNA_PROHIBITED = 0x0002 ,
|
||||
UIDNA_MAP_NFKC = 0x0003 ,
|
||||
UIDNA_LABEL_SEPARATOR = 0x0004
|
||||
typedef enum UStringPrepType UStringPrepType;
|
||||
|
||||
#define _SPREP_DATA_TYPE "spp"
|
||||
|
||||
enum UStringPrepType{
|
||||
USPREP_UNASSIGNED = 0x0000 ,
|
||||
USPREP_MAP = 0x0001 ,
|
||||
USPREP_PROHIBITED = 0x0002 ,
|
||||
USPREP_LABEL_SEPARATOR = 0x0003 ,
|
||||
USPREP_DELETE = 0x0004 ,
|
||||
USPREP_TYPE_LIMIT = 0x0005 ,
|
||||
};
|
||||
|
||||
static const char* usprepTypeNames[] ={
|
||||
"UNASSIGNED" ,
|
||||
"MAP" ,
|
||||
"PROHIBITED" ,
|
||||
"LABEL_SEPARATOR" ,
|
||||
"DELETE",
|
||||
"TYPE_LIMIT"
|
||||
};
|
||||
enum{
|
||||
_IDNA_LENGTH_IN_MAPPING_TABLE = 0x0003 /*11*/
|
||||
_SPREP_NORMALIZATION_ON = 0x0001,
|
||||
_SPREP_CHECK_BIDI_ON = 0x0002,
|
||||
};
|
||||
|
||||
enum{
|
||||
_SPREP_TYPE_THRESHOLD = 0xFFF0,
|
||||
_SPREP_MAX_INDEX_VALUE = 0x3FBF, /*16139*/
|
||||
_SPREP_MAX_INDEX_TOP_LENGTH = 0x0003
|
||||
};
|
||||
|
||||
/* indexes[] value names */
|
||||
enum {
|
||||
_IDNA_INDEX_TRIE_SIZE, /* number of bytes in normalization trie */
|
||||
_IDNA_INDEX_MAPPING_DATA_SIZE, /* The array that contains the mapping */
|
||||
_IDNA_INDEX_TOP=3 /* changing this requires a new formatVersion */
|
||||
_SPREP_INDEX_TRIE_SIZE = 0, /* number of bytes in normalization trie */
|
||||
_SPREP_INDEX_MAPPING_DATA_SIZE = 1, /* The array that contains the mapping */
|
||||
_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION = 2, /* The index of Unicode version of last entry in NormalizationCorrections.txt */
|
||||
_SPREP_ONE_UCHAR_MAPPING_INDEX_START = 3, /* The starting index of 1 UChar mapping index in the mapping data array */
|
||||
_SPREP_TWO_UCHARS_MAPPING_INDEX_START = 4, /* The starting index of 2 UChars mapping index in the mapping data array */
|
||||
_SPREP_THREE_UCHARS_MAPPING_INDEX_START = 5,
|
||||
_SPREP_FOUR_UCHARS_MAPPING_INDEX_START = 6,
|
||||
_SPREP_OPTIONS = 7, /* Bit set of options to turn on in the profile */
|
||||
_SPREP_INDEX_TOP=16 /* changing this requires a new formatVersion */
|
||||
};
|
||||
|
||||
enum {
|
||||
_IDNA_MAPPING_DATA_SIZE = 2000,
|
||||
_IDNA_MAP_TO_NOTHING = 0x7FF
|
||||
typedef struct UStringPrepKey UStringPrepKey;
|
||||
|
||||
|
||||
struct UStringPrepKey{
|
||||
char* name;
|
||||
char* path;
|
||||
};
|
||||
|
||||
#if defined(XP_CPLUSPLUS)
|
||||
static inline
|
||||
void uprv_syntaxError(const UChar* rules,
|
||||
struct UStringPrepProfile{
|
||||
int32_t indexes[_SPREP_INDEX_TOP];
|
||||
UTrie sprepTrie;
|
||||
const uint16_t* mappingData;
|
||||
UDataMemory* sprepData;
|
||||
UBool isDataLoaded;
|
||||
int32_t refCount;
|
||||
};
|
||||
|
||||
/**
|
||||
* Helper function for populating the UParseError struct
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_syntaxError(const UChar* rules,
|
||||
int32_t pos,
|
||||
int32_t rulesLen,
|
||||
UParseError* parseError)
|
||||
{
|
||||
if(parseError == NULL){
|
||||
return;
|
||||
}
|
||||
if(pos == rulesLen && rulesLen >0){
|
||||
pos--;
|
||||
}
|
||||
parseError->offset = pos;
|
||||
parseError->line = 0 ; // we are not using line numbers
|
||||
|
||||
// for pre-context
|
||||
int32_t start = (pos <=U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
|
||||
int32_t stop = pos;
|
||||
|
||||
u_memcpy(parseError->preContext,rules+start,stop-start);
|
||||
//null terminate the buffer
|
||||
parseError->preContext[stop-start] = 0;
|
||||
|
||||
//for post-context
|
||||
start = pos;
|
||||
if(start<rulesLen) {
|
||||
U16_FWD_1(rules, start, rulesLen);
|
||||
}
|
||||
UParseError* parseError);
|
||||
|
||||
|
||||
/**
|
||||
* Map every character in input stream with mapping character
|
||||
* in the mapping table and populate the output stream.
|
||||
* For any individual character the mapping table may specify
|
||||
* that that a character be mapped to nothing, mapped to one
|
||||
* other character or to a string of other characters.
|
||||
*
|
||||
* @param prep Profile to use
|
||||
* @param src Pointer to UChar buffer containing a single label
|
||||
* @param srcLength Number of characters in the source label
|
||||
* @param dest Pointer to the destination buffer to receive the output
|
||||
* @param destCapacity The capacity of destination array
|
||||
* @param options
|
||||
* @param parseError
|
||||
* @param status ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return The number of UChars in the destination buffer
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
usprep_map( UStringPrepProfile* prep,
|
||||
const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status );
|
||||
|
||||
|
||||
/**
|
||||
* Normalize the input stream using Normalization Form KC (NFKC)
|
||||
*
|
||||
* @param prep Profile to use
|
||||
* @param src Pointer to UChar buffer containing a single label
|
||||
* @param srcLength Number of characters in the source label
|
||||
* @param dest Pointer to the destination buffer to receive the output
|
||||
* @param destCapacity The capacity of destination array
|
||||
* @param status ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return The number of UChars in the destination buffer
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
usprep_normalize( UStringPrepProfile* prep,
|
||||
const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
UErrorCode* status );
|
||||
|
||||
|
||||
U_CFUNC UBool
|
||||
usprep_isLabelSeparator(UStringPrepProfile* profile,
|
||||
UChar32 ch, UErrorCode* status);
|
||||
|
||||
stop = ((pos+U_PARSE_CONTEXT_LEN)<= rulesLen )? (pos+(U_PARSE_CONTEXT_LEN)) :
|
||||
rulesLen;
|
||||
if(start < stop){
|
||||
u_memcpy(parseError->postContext,rules+start,stop-start);
|
||||
//null terminate the buffer
|
||||
parseError->postContext[stop-start]= 0;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
/* error codes for prototyping
|
||||
#define U_IDNA_ERROR_START U_ERROR_LIMIT
|
||||
#define U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 1))
|
||||
#define U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 2))
|
||||
#define U_IDNA_CHECK_BIDI_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 3))
|
||||
#define U_IDNA_STD3_ASCII_RULES_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 4))
|
||||
#define U_IDNA_ACE_PREFIX_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 5))
|
||||
#define U_IDNA_VERIFICATION_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 6))
|
||||
#define U_IDNA_LABEL_TOO_LONG_ERROR ((UErrorCode)(U_IDNA_ERROR_START + 8))
|
||||
*/
|
||||
|
||||
#endif /* #if !UCONFIG_NO_IDNA */
|
||||
|
||||
|
|
|
@ -1,513 +0,0 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: strprep.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003feb1
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include "strprep.h"
|
||||
#include "utrie.h"
|
||||
#include "umutex.h"
|
||||
#include "cmemory.h"
|
||||
#include "sprpimpl.h"
|
||||
#include "nameprep.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/ustring.h"
|
||||
|
||||
static const uint16_t* mappingData = NULL;
|
||||
static int32_t indexes[_IDNA_INDEX_TOP]={ 0 };
|
||||
static UBool _isDataLoaded = FALSE;
|
||||
static UTrie idnTrie={ 0,0,0,0,0,0,0 };
|
||||
static UDataMemory* idnData=NULL;
|
||||
static UErrorCode dataErrorCode =U_ZERO_ERROR;
|
||||
/* file definitions */
|
||||
static const char DATA_NAME[] = "uidna";
|
||||
static const char DATA_TYPE[] = "icu";
|
||||
|
||||
U_CFUNC UBool
|
||||
ustrprep_cleanup() {
|
||||
if(idnData!=NULL) {
|
||||
udata_close(idnData);
|
||||
idnData=NULL;
|
||||
}
|
||||
dataErrorCode=U_ZERO_ERROR;
|
||||
_isDataLoaded=FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV
|
||||
isAcceptable(void * /* context */,
|
||||
const char * /* type */,
|
||||
const char * /* name */,
|
||||
const UDataInfo *pInfo) {
|
||||
if(
|
||||
pInfo->size>=20 &&
|
||||
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
||||
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
||||
pInfo->dataFormat[0]==0x49 && /* dataFormat="IDNA" 0x49, 0x44, 0x4e, 0x41 */
|
||||
pInfo->dataFormat[1]==0x44 &&
|
||||
pInfo->dataFormat[2]==0x4e &&
|
||||
pInfo->dataFormat[3]==0x41 &&
|
||||
pInfo->formatVersion[0]==2 &&
|
||||
pInfo->formatVersion[2]==UTRIE_SHIFT &&
|
||||
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
|
||||
) {
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
getFoldingOffset(uint32_t data) {
|
||||
if(data&0x8000) {
|
||||
return (int32_t)(data&0x7fff);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
static UBool U_CALLCONV
|
||||
loadData(UErrorCode &errorCode) {
|
||||
/* load Unicode IDNA data from file */
|
||||
UBool isCached;
|
||||
|
||||
/* do this because double-checked locking is broken */
|
||||
umtx_lock(NULL);
|
||||
isCached=_isDataLoaded;
|
||||
umtx_unlock(NULL);
|
||||
|
||||
if(!isCached) {
|
||||
UTrie _idnTrie={ 0,0,0,0,0,0,0 };
|
||||
UDataMemory *data;
|
||||
const int32_t *p=NULL;
|
||||
const uint8_t *pb;
|
||||
|
||||
if(&errorCode==NULL || U_FAILURE(errorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* open the data outside the mutex block */
|
||||
//TODO: change the path
|
||||
data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errorCode);
|
||||
dataErrorCode=errorCode;
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return _isDataLoaded=FALSE;
|
||||
}
|
||||
|
||||
p=(const int32_t *)udata_getMemory(data);
|
||||
pb=(const uint8_t *)(p+_IDNA_INDEX_TOP);
|
||||
utrie_unserialize(&_idnTrie, pb, p[_IDNA_INDEX_TRIE_SIZE], &errorCode);
|
||||
_idnTrie.getFoldingOffset=getFoldingOffset;
|
||||
|
||||
|
||||
if(U_FAILURE(errorCode)) {
|
||||
dataErrorCode=errorCode;
|
||||
udata_close(data);
|
||||
return _isDataLoaded=FALSE;
|
||||
}
|
||||
|
||||
/* in the mutex block, set the data for this process */
|
||||
umtx_lock(NULL);
|
||||
if(idnData==NULL) {
|
||||
idnData=data;
|
||||
data=NULL;
|
||||
uprv_memcpy(&indexes, p, sizeof(indexes));
|
||||
uprv_memcpy(&idnTrie, &_idnTrie, sizeof(UTrie));
|
||||
} else {
|
||||
p=(const int32_t *)udata_getMemory(idnData);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
/* initialize some variables */
|
||||
mappingData=(uint16_t *)((uint8_t *)(p+_IDNA_INDEX_TOP)+indexes[_IDNA_INDEX_TRIE_SIZE]);
|
||||
|
||||
_isDataLoaded = TRUE;
|
||||
|
||||
/* if a different thread set it first, then close the extra data */
|
||||
if(data!=NULL) {
|
||||
udata_close(data); /* NULL if it was set correctly */
|
||||
}
|
||||
}
|
||||
|
||||
return _isDataLoaded;
|
||||
}
|
||||
|
||||
// *****************************************************************************
|
||||
// class StringPrep
|
||||
// *****************************************************************************
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
const char StringPrep::fgClassID=0;
|
||||
|
||||
UBool StringPrep::isDataLoaded(UErrorCode& status){
|
||||
if(U_FAILURE(status)){
|
||||
return FALSE;
|
||||
}
|
||||
if(_isDataLoaded==FALSE && U_FAILURE(dataErrorCode)){
|
||||
status = dataErrorCode;
|
||||
return FALSE;
|
||||
}
|
||||
loadData(dataErrorCode);
|
||||
if(U_FAILURE(dataErrorCode)){
|
||||
status = dataErrorCode;
|
||||
return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
StringPrep* StringPrep::createDefaultInstance(UErrorCode& status){
|
||||
StringPrep* strprep = new StringPrep();
|
||||
if(!isDataLoaded(status)){
|
||||
delete strprep;
|
||||
return NULL;
|
||||
}
|
||||
return strprep;
|
||||
}
|
||||
|
||||
StringPrep* StringPrep::createNameprepInstance(UErrorCode& status){
|
||||
StringPrep* strprep = new NamePrep(status);
|
||||
if(!isDataLoaded(status)){
|
||||
delete strprep;
|
||||
return NULL;
|
||||
}
|
||||
return strprep;
|
||||
}
|
||||
|
||||
UBool StringPrep::isNotProhibited(UChar32 /*ch*/){
|
||||
return FALSE;
|
||||
}
|
||||
UBool StringPrep::isUnassigned(UChar32 ch){
|
||||
|
||||
uint32_t result;
|
||||
UTRIE_GET16(&idnTrie,ch,result);
|
||||
return (result == UIDNA_UNASSIGNED);
|
||||
|
||||
}
|
||||
|
||||
|
||||
static inline void getValues(uint32_t result, int8_t& flag,
|
||||
int8_t& length, int32_t& index){
|
||||
/* first 3 bits contain the flag */
|
||||
flag = (int8_t) (result & 0x07);
|
||||
/* next 2 bits contain the length */
|
||||
length = (int8_t) ((result>>3) & 0x03);
|
||||
/* next 10 bits contain the index */
|
||||
index = (result>> 5);
|
||||
}
|
||||
|
||||
|
||||
int32_t StringPrep::map(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
UBool allowUnassigned,
|
||||
UParseError* parseError,
|
||||
UErrorCode& status ){
|
||||
|
||||
uint32_t result;
|
||||
int8_t flag;
|
||||
int8_t length;
|
||||
int32_t index;
|
||||
int32_t destIndex=0;
|
||||
int32_t srcIndex=0;
|
||||
|
||||
// check error status
|
||||
if(U_FAILURE(status)){
|
||||
return 0;
|
||||
}
|
||||
|
||||
//check arguments
|
||||
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
|
||||
status=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
if(srcLength == -1){
|
||||
srcLength = u_strlen(src);
|
||||
}
|
||||
|
||||
for(;srcIndex<srcLength;){
|
||||
UChar32 ch;
|
||||
|
||||
U16_NEXT(src,srcIndex,srcLength,ch);
|
||||
|
||||
UTRIE_GET16(&idnTrie,ch,result);
|
||||
|
||||
getValues(result,flag,length,index);
|
||||
|
||||
// check if the source codepoint is unassigned
|
||||
if(flag == UIDNA_UNASSIGNED){
|
||||
if(allowUnassigned == TRUE){
|
||||
//copy the ch to destination
|
||||
if(ch <= 0xFFFF){
|
||||
if(destIndex < destCapacity ){
|
||||
dest[destIndex] = (UChar)ch;
|
||||
}
|
||||
destIndex++;
|
||||
}else{
|
||||
if(destIndex+1 < destCapacity ){
|
||||
dest[destIndex] = U16_LEAD(ch);
|
||||
dest[destIndex+1] = U16_TRAIL(ch);
|
||||
}
|
||||
destIndex +=2;
|
||||
}
|
||||
}else{
|
||||
uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
|
||||
status = U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}else if((flag == UIDNA_MAP_NFKC && doNFKC == TRUE) ||
|
||||
(index == _IDNA_MAP_TO_NOTHING && doNFKC == FALSE)){
|
||||
|
||||
if(length == _IDNA_LENGTH_IN_MAPPING_TABLE){
|
||||
length = (int8_t) mappingData[index++];
|
||||
}
|
||||
|
||||
for(int8_t i =0; i< length; i++){
|
||||
if(destIndex < destCapacity ){
|
||||
dest[destIndex] = mappingData[index+i];
|
||||
}
|
||||
destIndex++; /* for pre-flighting */
|
||||
}
|
||||
}else{
|
||||
//copy the source into destination
|
||||
if(ch <= 0xFFFF){
|
||||
if(destIndex < destCapacity ){
|
||||
dest[destIndex] = (UChar)ch;
|
||||
}
|
||||
destIndex++;
|
||||
}else{
|
||||
if(destIndex+1 < destCapacity ){
|
||||
dest[destIndex] = U16_LEAD(ch);
|
||||
dest[destIndex+1] = U16_TRAIL(ch);
|
||||
}
|
||||
destIndex +=2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, destIndex, &status);
|
||||
}
|
||||
|
||||
|
||||
int32_t StringPrep::normalize( const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
UErrorCode& status ){
|
||||
|
||||
return unorm_normalize(src,srcLength,UNORM_NFKC,UNORM_UNICODE_3_2,dest,destCapacity,&status);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
1) Map -- For each character in the input, check if it has a mapping
|
||||
and, if so, replace it with its mapping.
|
||||
|
||||
2) Normalize -- Possibly normalize the result of step 1 using Unicode
|
||||
normalization.
|
||||
|
||||
3) Prohibit -- Check for any characters that are not allowed in the
|
||||
output. If any are found, return an error.
|
||||
|
||||
4) Check bidi -- Possibly check for right-to-left characters, and if
|
||||
any are found, make sure that the whole string satisfies the
|
||||
requirements for bidirectional strings. If the string does not
|
||||
satisfy the requirements for bidirectional strings, return an
|
||||
error.
|
||||
[Unicode3.2] defines several bidirectional categories; each character
|
||||
has one bidirectional category assigned to it. For the purposes of
|
||||
the requirements below, an "RandALCat character" is a character that
|
||||
has Unicode bidirectional categories "R" or "AL"; an "LCat character"
|
||||
is a character that has Unicode bidirectional category "L". Note
|
||||
|
||||
|
||||
that there are many characters which fall in neither of the above
|
||||
definitions; Latin digits (<U+0030> through <U+0039>) are examples of
|
||||
this because they have bidirectional category "EN".
|
||||
|
||||
In any profile that specifies bidirectional character handling, all
|
||||
three of the following requirements MUST be met:
|
||||
|
||||
1) The characters in section 5.8 MUST be prohibited.
|
||||
|
||||
2) If a string contains any RandALCat character, the string MUST NOT
|
||||
contain any LCat character.
|
||||
|
||||
3) If a string contains any RandALCat character, a RandALCat
|
||||
character MUST be the first character of the string, and a
|
||||
RandALCat character MUST be the last character of the string.
|
||||
*/
|
||||
|
||||
#define MAX_STACK_BUFFER_SIZE 300
|
||||
|
||||
int32_t StringPrep::process(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
UBool allowUnassigned,
|
||||
UParseError* parseError,
|
||||
UErrorCode& status ){
|
||||
// check error status
|
||||
if(U_FAILURE(status)){
|
||||
return 0;
|
||||
}
|
||||
|
||||
//check arguments
|
||||
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
|
||||
status=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
|
||||
UChar *b1 = b1Stack, *b2 = b2Stack;
|
||||
int32_t b1Len, b2Len=0,
|
||||
b1Capacity = MAX_STACK_BUFFER_SIZE ,
|
||||
b2Capacity = MAX_STACK_BUFFER_SIZE;
|
||||
uint32_t result;
|
||||
int32_t b2Index = 0;
|
||||
int8_t flag;
|
||||
int8_t length;
|
||||
int32_t index;
|
||||
UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
|
||||
UBool leftToRight=FALSE, rightToLeft=FALSE;
|
||||
int32_t rtlPos =-1, ltrPos =-1;
|
||||
|
||||
b1Len = map(src,srcLength, b1, b1Capacity,allowUnassigned, parseError, status);
|
||||
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// redo processing of string
|
||||
/* we do not have enough room so grow the buffer*/
|
||||
b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
|
||||
if(b1==NULL){
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR; // reset error
|
||||
|
||||
b1Len = map(src,srcLength, b1, b1Len,allowUnassigned, parseError, status);
|
||||
|
||||
}
|
||||
|
||||
b2Len = normalize(b1,b1Len, b2,b2Capacity,status);
|
||||
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// redo processing of string
|
||||
/* we do not have enough room so grow the buffer*/
|
||||
b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
|
||||
if(b2==NULL){
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR; // reset error
|
||||
|
||||
b2Len = normalize(b2,b2Len, b2,b2Len,status);
|
||||
|
||||
}
|
||||
|
||||
if(U_FAILURE(status)){
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
UChar32 ch;
|
||||
|
||||
for(; b2Index<b2Len;){
|
||||
|
||||
ch = 0;
|
||||
|
||||
U16_NEXT(b2, b2Index, b2Len, ch);
|
||||
|
||||
UTRIE_GET16(&idnTrie,ch,result);
|
||||
|
||||
getValues(result,flag,length,index);
|
||||
|
||||
if(flag == UIDNA_PROHIBITED
|
||||
&& isNotProhibited(ch) == FALSE){
|
||||
status = U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR;
|
||||
uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
direction = u_charDirection(ch);
|
||||
if(firstCharDir == U_CHAR_DIRECTION_COUNT){
|
||||
firstCharDir = direction;
|
||||
}
|
||||
if(direction == U_LEFT_TO_RIGHT){
|
||||
leftToRight = TRUE;
|
||||
ltrPos = b2Index-1;
|
||||
}
|
||||
if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
|
||||
rightToLeft = TRUE;
|
||||
rtlPos = b2Index-1;
|
||||
}
|
||||
}
|
||||
|
||||
// satisfy 2
|
||||
if( leftToRight == TRUE && rightToLeft == TRUE){
|
||||
status = U_IDNA_CHECK_BIDI_ERROR;
|
||||
uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
//satisfy 3
|
||||
if( rightToLeft == TRUE &&
|
||||
!((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
|
||||
(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
|
||||
){
|
||||
status = U_IDNA_CHECK_BIDI_ERROR;
|
||||
uprv_syntaxError(b2, rtlPos, b2Len, parseError);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if(b2Len <= destCapacity){
|
||||
uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
|
||||
}
|
||||
|
||||
CLEANUP:
|
||||
if(b1!=b1Stack){
|
||||
uprv_free(b1);
|
||||
}
|
||||
if(b2!=b2Stack){
|
||||
uprv_free(b2);
|
||||
}
|
||||
return u_terminateUChars(dest, destCapacity, b2Len, &status);
|
||||
}
|
||||
|
||||
|
||||
UBool StringPrep::isLabelSeparator(UChar32 ch, UErrorCode& status){
|
||||
// check error status
|
||||
if(U_FAILURE(status)){
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if(isDataLoaded(status)){
|
||||
int32_t result;
|
||||
UTRIE_GET16(&idnTrie,ch, result);
|
||||
if( (result & 0x07) == UIDNA_LABEL_SEPARATOR){
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_IDNA */
|
|
@ -1,365 +0,0 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: strprep.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003feb1
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#ifndef STRPREP_H
|
||||
#define STRPREP_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/parseerr.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**\file
|
||||
*
|
||||
* This API implements RF 3454 StringPrep standard.
|
||||
*
|
||||
* The steps for preparing strings are:
|
||||
*
|
||||
* 1) Map -- For each character in the input, check if it has a mapping
|
||||
* and, if so, replace it with its mapping.
|
||||
* <ul>
|
||||
* <li>Delete certain codepoints from the input because their
|
||||
* presence or absence in the protocol identifies should not
|
||||
* make two strings different</li>
|
||||
* <li>Case Mapings
|
||||
* <br>If Normalization is turned off
|
||||
* <br> Get mappings from case map tables
|
||||
* <br>else
|
||||
* <br> Get mappings from case map tables for normalization
|
||||
* <br> Use u_getFC_NFKC_Closure for obtaining extra mappings
|
||||
* </li>
|
||||
* </ul>
|
||||
* 2) Normalize -- Possibly normalize the result of step 1 using Unicode
|
||||
* normalization NFKC.
|
||||
*
|
||||
* 3) Prohibit -- Check for any characters that are not allowed in the
|
||||
* output. If any are found, return an error.
|
||||
*
|
||||
* 4) Check bidi -- Possibly check for right-to-left characters, and if
|
||||
* any are found, make sure that the whole string satisfies the
|
||||
* requirements for bidirectional strings. If the string does not
|
||||
* satisfy the requirements for bidirectional strings, return an
|
||||
* error.
|
||||
*
|
||||
* Some StringPrep profiles:
|
||||
* IDN: "Nameprep" http://www.ietf.org/rfc/rfc3491.txt
|
||||
* XMPP Node Identifiers: "Nodeprep" http://www.ietf.org/internet-drafts/draft-ietf-xmpp-nodeprep-01.txt
|
||||
* XMPP Resource Identifiers: "Resourceprep" http://www.ietf.org/internet-drafts/draft-ietf-xmpp-resourceprep-01.txt
|
||||
* ANONYMOUS SASL tokens: "plain" http://www.ietf.org/internet-drafts/draft-ietf-sasl-anon-00.txt
|
||||
* iSCSI http://www.ietf.org/internet-drafts/draft-ietf-ips-iscsi-string-prep-03.txt
|
||||
*/
|
||||
class StringPrep : public UObject{
|
||||
|
||||
protected:
|
||||
UVersionInfo unicodeVersion; /** The Character repertoire version of this profile */
|
||||
UBool bidiCheck; /** Option to turn BiDi checking on */
|
||||
UBool doNFKC; /** Option to turn NFKC on */
|
||||
|
||||
/**
|
||||
* Protected default constructor sub classes
|
||||
*/
|
||||
StringPrep(){};
|
||||
|
||||
public:
|
||||
/**
|
||||
* Destructor
|
||||
*/
|
||||
virtual inline ~StringPrep(){};
|
||||
|
||||
/**
|
||||
* Map every character in input stream with mapping character
|
||||
* in the mapping table and populate the output stream.
|
||||
* For any individual character the mapping table may specify
|
||||
* that that a character be mapped to nothing, mapped to one
|
||||
* other character or to a string of other characters.
|
||||
*
|
||||
* @param src Pointer to UChar buffer containing a single label
|
||||
* @param srcLength Number of characters in the source label
|
||||
* @param dest Pointer to the destination buffer to receive the output
|
||||
* @param destCapacity The capacity of destination array
|
||||
* @param allowUnassigned Unassigned values can be converted to ASCII for query operations
|
||||
* If TRUE unassigned values are treated as normal Unicode code point.
|
||||
* If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
* @param status ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return The number of UChars in the destination buffer
|
||||
*
|
||||
*/
|
||||
virtual int32_t map(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
UBool allowUnassigned,
|
||||
UParseError* parseError,
|
||||
UErrorCode& status );
|
||||
|
||||
/**
|
||||
* Normalize the input stream using Normalization Form KC (NFKC)
|
||||
*
|
||||
* @param src Pointer to UChar buffer containing a single label
|
||||
* @param srcLength Number of characters in the source label
|
||||
* @param dest Pointer to the destination buffer to receive the output
|
||||
* @param destCapacity The capacity of destination array
|
||||
* @param status ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return The number of UChars in the destination buffer
|
||||
*
|
||||
*
|
||||
*/
|
||||
virtual int32_t normalize( const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
UErrorCode& status );
|
||||
|
||||
|
||||
/**
|
||||
* Prepare the input stream with for use. This operation maps, normalizes(NFKC),
|
||||
* checks for prohited and BiDi characters in the order defined by RFC 3454
|
||||
*
|
||||
* @param src Pointer to UChar buffer containing a single label
|
||||
* @param srcLength Number of characters in the source label
|
||||
* @param dest Pointer to the destination buffer to receive the output
|
||||
* @param destCapacity The capacity of destination array
|
||||
* @param allowUnassigned Unassigned values can be converted to ASCII for query operations
|
||||
* If TRUE unassigned values are treated as normal Unicode code point.
|
||||
* If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
|
||||
* @param status ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return The number of UChars in the destination buffer
|
||||
*
|
||||
*
|
||||
*/
|
||||
virtual int32_t process(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
UBool allowUnassigned,
|
||||
UParseError* parseError,
|
||||
UErrorCode& status );
|
||||
|
||||
/**
|
||||
* Create a profile from prebuilt default Nameprep profile conforming to
|
||||
* nameprep internet draft (http://www.ietf.org/html.charters/idn-charter.html).
|
||||
* This is a built-in/unmodifiable profile.
|
||||
*
|
||||
* @param status ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return Pointer to StringPrep object that is created. Should be deleted by
|
||||
* by caller
|
||||
*
|
||||
*
|
||||
*/
|
||||
static StringPrep* createNameprepInstance(UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create a profile from prebuilt default StringPrep profile conforming to
|
||||
* RFC 3454 (ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt).
|
||||
* User defined profiles can be created by getting the default profile and
|
||||
* adding mappings, removing mappings, turning options ON/OFF and prohibiting
|
||||
* characters from the output.
|
||||
*
|
||||
* @param status ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return Pointer to StringPrep object that is created. Should be deleted by
|
||||
* the caller.
|
||||
*
|
||||
*
|
||||
*/
|
||||
static StringPrep* createDefaultInstance(UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Ascertain if the given code point is a Letter/Digit/Hyphen in the ASCII range
|
||||
*
|
||||
* @return TRUE is the code point is a Letter/Digit/Hyphen
|
||||
*
|
||||
*
|
||||
*/
|
||||
static inline UBool isLDHChar(UChar32 ch);
|
||||
|
||||
/**
|
||||
* Ascertain if the given code point is a label separator as specified by IDNA
|
||||
*
|
||||
* @return TRUE is the code point is a label separator
|
||||
*
|
||||
*
|
||||
*/
|
||||
virtual UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Get the BiDi option of this profile
|
||||
*
|
||||
*
|
||||
*/
|
||||
inline UBool getCheckBiDi();
|
||||
|
||||
/**
|
||||
* Get the normalization (NFKC) option of this profile
|
||||
*
|
||||
* @return The normalization option
|
||||
*
|
||||
*
|
||||
*/
|
||||
inline UBool getNormalization();
|
||||
|
||||
/**
|
||||
* Get the Unicode version which this profile
|
||||
* conforms to
|
||||
*
|
||||
*
|
||||
*/
|
||||
inline void getUnicodeVersion(UVersionInfo& info);
|
||||
|
||||
private:
|
||||
// Boiler plate
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*
|
||||
*/
|
||||
StringPrep(const StringPrep&){};
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
*
|
||||
*/
|
||||
StringPrep& operator=(const StringPrep& other) {return *this;};
|
||||
|
||||
/**
|
||||
* Return true if another object is semantically equal to this one.
|
||||
*
|
||||
* @param other the object to be compared with.
|
||||
* @return true if another object is semantically equal to this one.
|
||||
*
|
||||
*/
|
||||
UBool operator==(const StringPrep& other) const {return FALSE;};
|
||||
|
||||
/**
|
||||
* Return true if another object is semantically unequal to this one.
|
||||
*
|
||||
* @param other the object to be compared with.
|
||||
* @return true if another object is semantically unequal to this one.
|
||||
*
|
||||
*/
|
||||
UBool operator!=(const StringPrep& other) const { return !operator==(other); }
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for this class.
|
||||
*
|
||||
*
|
||||
*/
|
||||
static inline UClassID getStaticClassID();
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
||||
*
|
||||
*
|
||||
*/
|
||||
virtual inline UClassID getDynamicClassID() const;
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* Sub classes that slightly modify the default profile
|
||||
* implement this method to remove characters to
|
||||
* the prohibited list. The default implementation does not
|
||||
* check if the data is loaded or not. The caller is responsible
|
||||
* for checking for data.
|
||||
*
|
||||
*/
|
||||
virtual UBool isNotProhibited(UChar32 ch);
|
||||
|
||||
/**
|
||||
* Sub classes that slightly modify the default profile
|
||||
* implement this method to remove characters to
|
||||
* the unassigned list. The default implementation does not
|
||||
* check if the data is loaded or not. The caller is responsible
|
||||
* for checking for data.
|
||||
*/
|
||||
virtual UBool isUnassigned(UChar32 ch);
|
||||
|
||||
/**
|
||||
* Ascertains if uidna.icu data file is loaded.
|
||||
* If data is not loaded, loads the data file.
|
||||
*
|
||||
*
|
||||
*/
|
||||
static UBool isDataLoaded(UErrorCode& status);
|
||||
|
||||
private:
|
||||
|
||||
/**
|
||||
* The address of this static class variable serves as this class's ID
|
||||
* for ICU "poor man's RTTI".
|
||||
*/
|
||||
static const char fgClassID;
|
||||
|
||||
};
|
||||
|
||||
inline UBool StringPrep::getCheckBiDi(){
|
||||
return bidiCheck;
|
||||
}
|
||||
|
||||
|
||||
inline UBool StringPrep::getNormalization(){
|
||||
return doNFKC;
|
||||
}
|
||||
|
||||
inline void StringPrep::getUnicodeVersion(UVersionInfo& info){
|
||||
for(int32_t i=0; i< (int32_t)(sizeof(info)/sizeof(info[0])); i++){
|
||||
info[i] = unicodeVersion[i];
|
||||
}
|
||||
}
|
||||
|
||||
inline UClassID StringPrep::getStaticClassID() {
|
||||
return (UClassID)&fgClassID;
|
||||
}
|
||||
|
||||
inline UClassID StringPrep::getDynamicClassID() const {
|
||||
return getStaticClassID();
|
||||
}
|
||||
|
||||
inline UBool StringPrep::isLDHChar(UChar32 ch){
|
||||
// high runner case
|
||||
if(ch>0x007A){
|
||||
return FALSE;
|
||||
}
|
||||
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
|
||||
if( (ch==0x002D) ||
|
||||
(0x0030 <= ch && ch <= 0x0039) ||
|
||||
(0x0041 <= ch && ch <= 0x005A) ||
|
||||
(0x0061 <= ch && ch <= 0x007A)
|
||||
){
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_IDNA */
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
|
@ -58,7 +58,7 @@ u_cleanup(void)
|
|||
|
||||
}
|
||||
#if !UCONFIG_NO_IDNA
|
||||
ustrprep_cleanup();
|
||||
usprep_cleanup();
|
||||
#endif
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
breakiterator_cleanup();
|
||||
|
|
|
@ -35,7 +35,7 @@ U_CFUNC UBool uloc_cleanup(void);
|
|||
|
||||
U_CFUNC UBool breakiterator_cleanup(void);
|
||||
|
||||
U_CFUNC UBool ustrprep_cleanup(void);
|
||||
U_CFUNC UBool usprep_cleanup(void);
|
||||
|
||||
U_CFUNC UBool U_EXPORT2 ucnv_cleanup(void);
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
|
||||
#include "unicode/uidna.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "strprep.h"
|
||||
#include "unicode/usprep.h"
|
||||
#include "punycode.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "cmemory.h"
|
||||
|
@ -40,6 +40,7 @@ static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
|
|||
#define CAPITAL_Z 0x005A
|
||||
#define LOWER_CASE_DELTA 0x0020
|
||||
#define FULL_STOP 0x002E
|
||||
#define DATA_FILE_NAME "uidna"
|
||||
|
||||
inline static UChar
|
||||
toASCIILower(UChar ch){
|
||||
|
@ -119,21 +120,70 @@ compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
|
|||
return lengthResult;
|
||||
}
|
||||
|
||||
static inline UBool
|
||||
isLDHChar(UChar32 ch){
|
||||
// high runner case
|
||||
if(ch>0x007A){
|
||||
return FALSE;
|
||||
}
|
||||
//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
|
||||
if( (ch==0x002D) ||
|
||||
(0x0030 <= ch && ch <= 0x0039) ||
|
||||
(0x0041 <= ch && ch <= 0x005A) ||
|
||||
(0x0061 <= ch && ch <= 0x007A)
|
||||
){
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_toASCII(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status){
|
||||
|
||||
if(status == NULL || U_FAILURE(*status)){
|
||||
return 0;
|
||||
}
|
||||
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
// returns the length of the label excluding the separator
|
||||
// if *limit == separator then the length returned does not include
|
||||
// the separtor.
|
||||
static inline int32_t
|
||||
getNextSeparator(UChar *src,int32_t srcLength,UStringPrepProfile* nameprep,
|
||||
UChar **limit,
|
||||
UBool *done,
|
||||
UErrorCode *status){
|
||||
if(srcLength == -1){
|
||||
int32_t i;
|
||||
for(i=0 ; ;i++){
|
||||
if(src[i] == 0){
|
||||
*limit = src + i; // point to null
|
||||
*done = TRUE;
|
||||
return i;
|
||||
}
|
||||
if(usprep_isLabelSeparator(nameprep, src[i], status)){
|
||||
*limit = src + (i+1); // go past the delimiter
|
||||
return i;
|
||||
|
||||
}
|
||||
}
|
||||
}else{
|
||||
int32_t i;
|
||||
for(i=0;i<srcLength;i++){
|
||||
if(usprep_isLabelSeparator(nameprep, src[i], status)){
|
||||
*limit = src + (i+1); // go past the delimiter
|
||||
return i;
|
||||
}
|
||||
}
|
||||
// we have not found the delimiter
|
||||
// if(i==srcLength)
|
||||
*limit = src+srcLength;
|
||||
*done = TRUE;
|
||||
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t
|
||||
_internal_toASCII(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UStringPrepProfile* nameprep,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status){
|
||||
|
||||
UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
|
||||
//initialize pointers to stack buffers
|
||||
UChar *b1 = b1Stack, *b2 = b2Stack;
|
||||
|
@ -142,7 +192,7 @@ uidna_toASCII(const UChar* src, int32_t srcLength,
|
|||
b2Capacity = MAX_LABEL_BUFFER_SIZE ,
|
||||
reqLength=0;
|
||||
|
||||
|
||||
int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
|
||||
UBool* caseFlags = NULL;
|
||||
|
||||
// the source contains all ascii codepoints
|
||||
|
@ -153,18 +203,12 @@ uidna_toASCII(const UChar* src, int32_t srcLength,
|
|||
int32_t j=0;
|
||||
|
||||
//get the options
|
||||
UBool allowUnassigned = (UBool)((options & UIDNA_ALLOW_UNASSIGNED) != 0);
|
||||
UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
|
||||
|
||||
int32_t failPos = -1;
|
||||
// step 2
|
||||
StringPrep* prep = StringPrep::createNameprepInstance(*status);
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned, parseError, *status);
|
||||
int32_t failPos = -1;
|
||||
|
||||
// step 2
|
||||
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
|
||||
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// redo processing of string
|
||||
|
@ -177,7 +221,7 @@ uidna_toASCII(const UChar* src, int32_t srcLength,
|
|||
|
||||
*status = U_ZERO_ERROR; // reset error
|
||||
|
||||
b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
|
||||
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
|
||||
}
|
||||
// error bail out
|
||||
if(U_FAILURE(*status)){
|
||||
|
@ -192,7 +236,7 @@ uidna_toASCII(const UChar* src, int32_t srcLength,
|
|||
// here we do not assemble surrogates
|
||||
// since we know that LDH code points
|
||||
// are in the ASCII range only
|
||||
if(prep->isLDHChar(b1[j])==FALSE){
|
||||
if(isLDHChar(b1[j])==FALSE){
|
||||
srcIsLDH = FALSE;
|
||||
failPos = j;
|
||||
}
|
||||
|
@ -292,30 +336,20 @@ CLEANUP:
|
|||
}
|
||||
uprv_free(caseFlags);
|
||||
|
||||
delete prep;
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, reqLength, status);
|
||||
}
|
||||
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_toUnicode(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status){
|
||||
|
||||
if(status == NULL || U_FAILURE(*status)){
|
||||
return 0;
|
||||
}
|
||||
if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
static int32_t
|
||||
_internal_toUnicode(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UStringPrepProfile* nameprep,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status){
|
||||
|
||||
//get the options
|
||||
UBool allowUnassigned = (UBool)((options & UIDNA_ALLOW_UNASSIGNED) != 0);
|
||||
UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
|
||||
int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
|
||||
|
||||
UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
|
||||
|
||||
|
@ -326,8 +360,7 @@ uidna_toUnicode(const UChar* src, int32_t srcLength,
|
|||
b2Capacity = MAX_LABEL_BUFFER_SIZE,
|
||||
b3Capacity = MAX_LABEL_BUFFER_SIZE,
|
||||
reqLength=0;
|
||||
|
||||
StringPrep* prep = StringPrep::createNameprepInstance(*status);
|
||||
|
||||
b1Len = 0;
|
||||
UBool* caseFlags = NULL;
|
||||
|
||||
|
@ -335,10 +368,6 @@ uidna_toUnicode(const UChar* src, int32_t srcLength,
|
|||
UBool srcIsLDH = TRUE;
|
||||
int32_t failPos =0;
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
// step 1: find out if all the codepoints in src are ASCII
|
||||
if(srcLength==-1){
|
||||
srcLength = 0;
|
||||
|
@ -349,7 +378,7 @@ uidna_toUnicode(const UChar* src, int32_t srcLength,
|
|||
// here we do not assemble surrogates
|
||||
// since we know that LDH code points
|
||||
// are in the ASCII range only
|
||||
if(prep->isLDHChar(src[srcLength])==FALSE){
|
||||
if(isLDHChar(src[srcLength])==FALSE){
|
||||
srcIsLDH = FALSE;
|
||||
failPos = srcLength;
|
||||
}
|
||||
|
@ -363,7 +392,7 @@ uidna_toUnicode(const UChar* src, int32_t srcLength,
|
|||
// here we do not assemble surrogates
|
||||
// since we know that LDH code points
|
||||
// are in the ASCII range only
|
||||
if(prep->isLDHChar(src[j])==FALSE){
|
||||
if(isLDHChar(src[j])==FALSE){
|
||||
srcIsLDH = FALSE;
|
||||
failPos = j;
|
||||
}
|
||||
|
@ -372,7 +401,7 @@ uidna_toUnicode(const UChar* src, int32_t srcLength,
|
|||
|
||||
if(srcIsASCII == FALSE){
|
||||
// step 2: process the string
|
||||
b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parseError, *status);
|
||||
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// redo processing of string
|
||||
/* we do not have enough room so grow the buffer*/
|
||||
|
@ -384,7 +413,7 @@ uidna_toUnicode(const UChar* src, int32_t srcLength,
|
|||
|
||||
*status = U_ZERO_ERROR; // reset error
|
||||
|
||||
b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
|
||||
b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
|
||||
}
|
||||
//bail out on error
|
||||
if(U_FAILURE(*status)){
|
||||
|
@ -495,8 +524,7 @@ CLEANUP:
|
|||
}
|
||||
uprv_free(caseFlags);
|
||||
|
||||
delete prep;
|
||||
|
||||
|
||||
// The RFC states that
|
||||
// <quote>
|
||||
// ToUnicode never fails. If any step fails, then the original input
|
||||
|
@ -518,45 +546,64 @@ CLEANUP:
|
|||
return u_terminateUChars(dest, destCapacity, reqLength, status);
|
||||
}
|
||||
|
||||
// returns the length of the label excluding the separator
|
||||
// if *limit == separator then the length returned does not include
|
||||
// the separtor.
|
||||
static int32_t
|
||||
getNextSeparator(UChar *src,int32_t srcLength,StringPrep* prep,
|
||||
UChar **limit,
|
||||
UBool *done,
|
||||
UErrorCode *status){
|
||||
if(srcLength == -1){
|
||||
int32_t i;
|
||||
for(i=0 ; ;i++){
|
||||
if(src[i] == 0){
|
||||
*limit = src + i; // point to null
|
||||
*done = TRUE;
|
||||
return i;
|
||||
}
|
||||
if(prep->isLabelSeparator(src[i],*status)){
|
||||
*limit = src + (i+1); // go past the delimiter
|
||||
return i;
|
||||
|
||||
}
|
||||
}
|
||||
}else{
|
||||
int32_t i;
|
||||
for(i=0;i<srcLength;i++){
|
||||
if(prep->isLabelSeparator(src[i],*status)){
|
||||
*limit = src + (i+1); // go past the delimiter
|
||||
return i;
|
||||
}
|
||||
}
|
||||
// we have not found the delimiter
|
||||
// if(i==srcLength)
|
||||
*limit = src+srcLength;
|
||||
*done = TRUE;
|
||||
|
||||
return i;
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_toASCII(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status){
|
||||
|
||||
if(status == NULL || U_FAILURE(*status)){
|
||||
return 0;
|
||||
}
|
||||
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
UStringPrepProfile* nameprep = usprep_open(NULL,DATA_FILE_NAME, status);
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
|
||||
|
||||
/* close the profile*/
|
||||
usprep_close(nameprep);
|
||||
|
||||
return retLen;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_toUnicode(const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status){
|
||||
|
||||
if(status == NULL || U_FAILURE(*status)){
|
||||
return 0;
|
||||
}
|
||||
if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
UStringPrepProfile* nameprep = usprep_open(NULL, DATA_FILE_NAME, status);
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
|
||||
|
||||
usprep_close(nameprep);
|
||||
|
||||
return retLen;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uidna_IDNToASCII( const UChar *src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
|
@ -574,7 +621,7 @@ uidna_IDNToASCII( const UChar *src, int32_t srcLength,
|
|||
|
||||
int32_t reqLength = 0;
|
||||
|
||||
StringPrep* prep = StringPrep::createNameprepInstance(*status);
|
||||
UStringPrepProfile* nameprep = usprep_open(NULL, DATA_FILE_NAME, status);
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
return 0;
|
||||
|
@ -592,11 +639,12 @@ uidna_IDNToASCII( const UChar *src, int32_t srcLength,
|
|||
|
||||
for(;;){
|
||||
|
||||
labelLen = getNextSeparator(labelStart,remainingLen, prep, &delimiter,&done, status);
|
||||
labelLen = getNextSeparator(labelStart,remainingLen, nameprep, &delimiter,&done, status);
|
||||
|
||||
labelReqLength = uidna_toASCII( labelStart, labelLen,
|
||||
currentDest, remainingDestCapacity,
|
||||
options, parseError, status);
|
||||
labelReqLength = _internal_toASCII( labelStart, labelLen,
|
||||
currentDest, remainingDestCapacity,
|
||||
options, nameprep,
|
||||
parseError, status);
|
||||
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
|
||||
|
@ -636,7 +684,7 @@ uidna_IDNToASCII( const UChar *src, int32_t srcLength,
|
|||
|
||||
}
|
||||
|
||||
delete prep;
|
||||
usprep_close(nameprep);
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, reqLength, status);
|
||||
}
|
||||
|
@ -658,7 +706,7 @@ uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
|
|||
|
||||
int32_t reqLength = 0;
|
||||
|
||||
StringPrep* prep = StringPrep::createNameprepInstance(*status);
|
||||
UStringPrepProfile* nameprep = usprep_open(NULL, DATA_FILE_NAME, status);
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
return 0;
|
||||
|
@ -676,11 +724,12 @@ uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
|
|||
|
||||
for(;;){
|
||||
|
||||
labelLen = getNextSeparator(labelStart,remainingLen, prep, &delimiter,&done, status);
|
||||
labelLen = getNextSeparator(labelStart,remainingLen, nameprep, &delimiter,&done, status);
|
||||
|
||||
labelReqLength = uidna_toUnicode(labelStart, labelLen,
|
||||
currentDest, remainingDestCapacity,
|
||||
options, parseError, status);
|
||||
labelReqLength = _internal_toUnicode(labelStart, labelLen,
|
||||
currentDest, remainingDestCapacity,
|
||||
options, nameprep,
|
||||
parseError, status);
|
||||
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
|
||||
|
@ -721,7 +770,7 @@ uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
|
|||
|
||||
}
|
||||
|
||||
delete prep;
|
||||
usprep_close(nameprep);
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, reqLength, status);
|
||||
}
|
||||
|
|
|
@ -44,7 +44,7 @@
|
|||
* once.
|
||||
* ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
|
||||
* ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
|
||||
*\end_file
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
|
@ -84,18 +84,18 @@
|
|||
* @param destCapacity Size of dest.
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_ERROR error code.
|
||||
*
|
||||
* - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
* - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
*
|
||||
* @param parseError Pointer to UParseError struct to receive information on position
|
||||
* of error if an error is encountered. Can be NULL.
|
||||
|
@ -129,23 +129,23 @@ uidna_toASCII(const UChar* src, int32_t srcLength,
|
|||
* @param destCapacity Size of dest.
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_ERROR error code.
|
||||
*
|
||||
* - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points. <b> Note: </b> This option is
|
||||
* required on toUnicode operation because the RFC mandates
|
||||
* verification of decoded ACE input by applying toASCII and comparing
|
||||
* its output with source
|
||||
* - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points. <b> Note: </b> This option is
|
||||
* required on toUnicode operation because the RFC mandates
|
||||
* verification of decoded ACE input by applying toASCII and comparing
|
||||
* its output with source
|
||||
*
|
||||
*
|
||||
*
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
*
|
||||
* @param parseError Pointer to UParseError struct to receive information on position
|
||||
* of error if an error is encountered. Can be NULL.
|
||||
|
@ -184,18 +184,18 @@ uidna_toUnicode(const UChar* src, int32_t srcLength,
|
|||
* @param destCapacity Size of dest.
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
*
|
||||
* - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
* - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
*
|
||||
* @param parseError Pointer to UParseError struct to receive information on position
|
||||
* of error if an error is encountered. Can be NULL.
|
||||
|
@ -230,18 +230,18 @@ uidna_IDNToASCII( const UChar* src, int32_t srcLength,
|
|||
* @param destCapacity Size of dest.
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
*
|
||||
* - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
* - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
*
|
||||
* @param parseError Pointer to UParseError struct to receive information on position
|
||||
* of error if an error is encountered. Can be NULL.
|
||||
|
@ -277,18 +277,18 @@ uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
|
|||
* @param length2 Length of second source string, or -1 if NUL-terminated.
|
||||
* @param options A bit set of options:
|
||||
*
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
* - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_CODE_POINT_FOUND error code.
|
||||
*
|
||||
* - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
* - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
*
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
* - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
|
||||
* If this option is set and the input does not satisfy STD3 rules,
|
||||
* the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
|
||||
*
|
||||
* @param status ICU error code in/out parameter.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
|
|
121
icu4c/source/common/unicode/usprep.h
Normal file
121
icu4c/source/common/unicode/usprep.h
Normal file
|
@ -0,0 +1,121 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: usprep.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003jul2
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#ifndef __USPREP_H__
|
||||
#define __USPREP_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include "unicode/parseerr.h"
|
||||
|
||||
typedef struct UStringPrepProfile UStringPrepProfile;
|
||||
|
||||
|
||||
/**
|
||||
* Option to prohibit processing of unassigned codepoints in the input
|
||||
*
|
||||
* @see usprep_prepare
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
#define USPREP_NONE 0x0000
|
||||
|
||||
/**
|
||||
* Option to allow processing of unassigned codepoints in the input
|
||||
*
|
||||
* @see usprep_prepare
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
#define USPREP_ALLOW_UNASSIGNED 0x0001
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Creates a StringPrep profile from the data file.
|
||||
*
|
||||
* @param path string containing the full path pointing to the directory
|
||||
* where the resources reside followed by the package name
|
||||
* e.g. "/usr/resource/my_app/resources/guimessages" on a Unix system.
|
||||
* if NULL, ICU default data files will be used.
|
||||
* @param fileName name of the profile file to be opened
|
||||
* @param status ICU error code in/out parameter. Must not be NULL.
|
||||
* Must fulfill U_SUCCESS before the function call.
|
||||
* @return Pointer to UStringPrepProfile that is opened. Should be closed by
|
||||
* calling usprep_close()
|
||||
* @see usprep_close()
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
U_CAPI UStringPrepProfile* U_EXPORT2
|
||||
usprep_open(const char* path,
|
||||
const char* fileName,
|
||||
UErrorCode* status);
|
||||
|
||||
|
||||
/**
|
||||
* Closes the profile
|
||||
* @param profile The profile to close
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
usprep_close(UStringPrepProfile* profile);
|
||||
|
||||
|
||||
/**
|
||||
* Prepare the input stream for use. This operation maps, normalizes(NFKC),
|
||||
* checks for prohited and BiDi characters in the order defined by RFC 3454
|
||||
* depending on the options specified
|
||||
*
|
||||
* @param prep The profile to use
|
||||
* @param src Pointer to UChar buffer containing the string to prepare
|
||||
* @param srcLength Number of characters in the source string
|
||||
* @param dest Pointer to the destination buffer to receive the output
|
||||
* @param destCapacity The capacity of destination array
|
||||
* @paran options A bit set of options:
|
||||
*
|
||||
* - USPREP_NONE Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
* If unassigned code points are found the operation fails with
|
||||
* U_UNASSIGNED_ERROR error code.
|
||||
*
|
||||
* - USPREP_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
|
||||
* If this option is set, the unassigned code points are in the input
|
||||
* are treated as normal Unicode code points.
|
||||
* @param parseError Pointer to UParseError struct to receive information on position
|
||||
* of error if an error is encountered. Can be NULL.
|
||||
* @param status ICU in/out error code parameter.
|
||||
* U_INVALID_CHAR_FOUND if src contains
|
||||
* unmatched single surrogates.
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
|
||||
* too many code points.
|
||||
* U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
|
||||
* @return Number of ASCII characters converted.
|
||||
* @return The number of UChars in the destination buffer
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
usprep_prepare( const UStringPrepProfile* prep,
|
||||
const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status );
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_IDNA */
|
||||
|
||||
#endif
|
|
@ -650,16 +650,23 @@ typedef enum UErrorCode {
|
|||
* The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes
|
||||
*/
|
||||
U_IDNA_ERROR_START=0x10400,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR,
|
||||
U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR,
|
||||
U_IDNA_PROHIBITED_ERROR,
|
||||
U_IDNA_UNASSIGNED_ERROR,
|
||||
U_IDNA_CHECK_BIDI_ERROR,
|
||||
U_IDNA_STD3_ASCII_RULES_ERROR,
|
||||
U_IDNA_ACE_PREFIX_ERROR,
|
||||
U_IDNA_VERIFICATION_ERROR,
|
||||
U_IDNA_LABEL_TOO_LONG_ERROR,
|
||||
U_IDNA_ERROR_LIMIT,
|
||||
/*
|
||||
* Aliases for StringPrep
|
||||
*/
|
||||
U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
|
||||
U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
|
||||
U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
|
||||
|
||||
|
||||
U_ERROR_LIMIT=U_IDNA_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
|
||||
U_ERROR_LIMIT=U_IDNA_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
|
||||
} UErrorCode;
|
||||
|
||||
/* Use the following to determine if an UErrorCode represents */
|
||||
|
|
|
@ -26,6 +26,9 @@
|
|||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
#ifdef DEBUG
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
/**
|
||||
* Unicode property names and property value names are compared
|
||||
* "loosely". Property[Value]Aliases.txt say:
|
||||
|
@ -429,6 +432,20 @@ u_getIntPropertyMaxValue(UProperty which) {
|
|||
* Do not use a UnicodeSet pattern because that causes infinite recursion;
|
||||
* UnicodeSet depends on the inclusions set.
|
||||
*/
|
||||
#ifdef DEBUG
|
||||
static uint32_t
|
||||
strrch(const char* source,uint32_t sourceLen,char find){
|
||||
const char* tSourceEnd =source + (sourceLen-1);
|
||||
while(tSourceEnd>= source){
|
||||
if(*tSourceEnd==find){
|
||||
return (uint32_t)(tSourceEnd-source);
|
||||
}
|
||||
tSourceEnd--;
|
||||
}
|
||||
return (uint32_t)(tSourceEnd-source);
|
||||
}
|
||||
#endif
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_getInclusions(USet* set, UErrorCode *pErrorCode) {
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
|
@ -441,4 +458,64 @@ uprv_getInclusions(USet* set, UErrorCode *pErrorCode) {
|
|||
unorm_addPropertyStarts(set, pErrorCode);
|
||||
#endif
|
||||
uchar_addPropertyStarts(set, pErrorCode);
|
||||
|
||||
#ifdef DEBUG
|
||||
{
|
||||
UChar* result=NULL;
|
||||
int32_t resultCapacity=0;
|
||||
int32_t bufLen = uset_toPattern(set,result,resultCapacity,TRUE,pErrorCode);
|
||||
char* resultChars = NULL;
|
||||
if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR){
|
||||
uint32_t len = 0, add=0;
|
||||
char *buf=NULL, *current = NULL;
|
||||
*pErrorCode = U_ZERO_ERROR;
|
||||
resultCapacity = bufLen;
|
||||
result = (UChar*) uprv_malloc(resultCapacity * U_SIZEOF_UCHAR);
|
||||
bufLen = uset_toPattern(set,result,resultCapacity,TRUE,pErrorCode);
|
||||
resultChars = (char*) uprv_malloc(len+1);
|
||||
u_UCharsToChars(result,resultChars,bufLen);
|
||||
resultChars[bufLen] = 0;
|
||||
buf = resultChars;
|
||||
/*printf(resultChars);*/
|
||||
while(len < bufLen){
|
||||
add = 70-5/* for ", +\n */;
|
||||
current = buf +len;
|
||||
if (add < (bufLen-len)) {
|
||||
uint32_t index = strrch(current,add,'\\');
|
||||
if (index > add) {
|
||||
index = add;
|
||||
} else {
|
||||
int32_t num =index-1;
|
||||
uint32_t seqLen;
|
||||
while(num>0){
|
||||
if(current[num]=='\\'){
|
||||
num--;
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ((index-num)%2==0) {
|
||||
index--;
|
||||
}
|
||||
seqLen = (current[index+1]=='u') ? 6 : 2;
|
||||
if ((add-index) < seqLen) {
|
||||
add = index + seqLen;
|
||||
}
|
||||
}
|
||||
}
|
||||
fwrite("\"",1,1,stdout);
|
||||
if(len+add<bufLen){
|
||||
fwrite(current,1,add,stdout);
|
||||
fwrite("\" +\n",1,4,stdout);
|
||||
}else{
|
||||
fwrite(current,1,bufLen-len,stdout);
|
||||
}
|
||||
len+=add;
|
||||
}
|
||||
|
||||
}
|
||||
uprv_free(result);
|
||||
uprv_free(resultChars);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
808
icu4c/source/common/usprep.cpp
Normal file
808
icu4c/source/common/usprep.cpp
Normal file
|
@ -0,0 +1,808 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: usprep.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003jul2
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include "unicode/usprep.h"
|
||||
|
||||
#include "unicode/unorm.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uversion.h"
|
||||
#include "umutex.h"
|
||||
#include "cmemory.h"
|
||||
#include "sprpimpl.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "uhash.h"
|
||||
#include "cstring.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/*
|
||||
Static cache for already opened StringPrep profiles
|
||||
*/
|
||||
static UHashtable *SHARED_DATA_HASHTABLE = NULL;
|
||||
|
||||
static UMTX usprepMutex = NULL;
|
||||
|
||||
|
||||
static UBool U_CALLCONV
|
||||
isAcceptable(void * /* context */,
|
||||
const char * /* type */,
|
||||
const char * /* name */,
|
||||
const UDataInfo *pInfo) {
|
||||
if(
|
||||
pInfo->size>=20 &&
|
||||
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
||||
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
||||
pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
|
||||
pInfo->dataFormat[1]==0x50 &&
|
||||
pInfo->dataFormat[2]==0x52 &&
|
||||
pInfo->dataFormat[3]==0x50 &&
|
||||
pInfo->formatVersion[0]==3 &&
|
||||
pInfo->formatVersion[2]==UTRIE_SHIFT &&
|
||||
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
|
||||
) {
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
getFoldingOffset(uint32_t data) {
|
||||
|
||||
return (int32_t)data;
|
||||
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
|
||||
/* hashes an entry */
|
||||
static int32_t U_EXPORT2 U_CALLCONV
|
||||
hashEntry(const UHashTok parm) {
|
||||
UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
|
||||
UHashTok namekey, pathkey;
|
||||
namekey.pointer = b->name;
|
||||
pathkey.pointer = b->path;
|
||||
return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
|
||||
}
|
||||
|
||||
/* compares two entries */
|
||||
static UBool U_EXPORT2 U_CALLCONV
|
||||
compareEntries(const UHashTok p1, const UHashTok p2) {
|
||||
UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
|
||||
UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
|
||||
UHashTok name1, name2, path1, path2;
|
||||
name1.pointer = b1->name;
|
||||
name2.pointer = b2->name;
|
||||
path1.pointer = b1->path;
|
||||
path2.pointer = b2->path;
|
||||
return ((UBool)(uhash_compareChars(name1, name2) &
|
||||
uhash_compareChars(path1, path2)));
|
||||
}
|
||||
|
||||
|
||||
|
||||
U_CFUNC void
|
||||
usprep_init(UErrorCode *status) {
|
||||
umtx_init(&usprepMutex);
|
||||
}
|
||||
|
||||
/** Initializes the cache for resources */
|
||||
static void
|
||||
initCache(UErrorCode *status) {
|
||||
UBool makeCache = FALSE;
|
||||
umtx_lock(&usprepMutex);
|
||||
makeCache = (SHARED_DATA_HASHTABLE == NULL);
|
||||
umtx_unlock(&usprepMutex);
|
||||
if(makeCache) {
|
||||
UHashtable *newCache = uhash_open(hashEntry, compareEntries, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
umtx_lock(&usprepMutex);
|
||||
if(SHARED_DATA_HASHTABLE == NULL) {
|
||||
SHARED_DATA_HASHTABLE = newCache;
|
||||
newCache = NULL;
|
||||
}
|
||||
umtx_unlock(&usprepMutex);
|
||||
if(newCache != NULL) {
|
||||
uhash_close(newCache);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV
|
||||
loadData(UStringPrepProfile* profile,
|
||||
const char* path,
|
||||
const char* name,
|
||||
const char* type,
|
||||
UErrorCode* errorCode) {
|
||||
/* load Unicode SPREP data from file */
|
||||
UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
|
||||
UDataMemory *dataMemory;
|
||||
const int32_t *p=NULL;
|
||||
const uint8_t *pb;
|
||||
UVersionInfo unicodeVersion;
|
||||
int32_t normVer, uniVer;
|
||||
|
||||
if(errorCode==NULL || U_FAILURE(*errorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* open the data outside the mutex block */
|
||||
//TODO: change the path
|
||||
dataMemory=udata_openChoice(path, type, name, isAcceptable, NULL, errorCode);
|
||||
if(U_FAILURE(*errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
p=(const int32_t *)udata_getMemory(dataMemory);
|
||||
pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
|
||||
utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
|
||||
_sprepTrie.getFoldingOffset=getFoldingOffset;
|
||||
|
||||
|
||||
if(U_FAILURE(*errorCode)) {
|
||||
udata_close(dataMemory);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* in the mutex block, set the data for this process */
|
||||
umtx_lock(&usprepMutex);
|
||||
if(profile->sprepData==NULL) {
|
||||
profile->sprepData=dataMemory;
|
||||
dataMemory=NULL;
|
||||
uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
|
||||
uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
|
||||
} else {
|
||||
p=(const int32_t *)udata_getMemory(profile->sprepData);
|
||||
}
|
||||
umtx_unlock(&usprepMutex);
|
||||
/* initialize some variables */
|
||||
profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
|
||||
|
||||
/*
|
||||
* check the normalization corrections version and the current Unicode version
|
||||
* supported by ICU
|
||||
*/
|
||||
u_versionFromString(unicodeVersion, U_UNICODE_VERSION);
|
||||
normVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
|
||||
uniVer = (unicodeVersion[0] << 24) + (unicodeVersion[1] << 16) +
|
||||
(unicodeVersion[2] << 8 ) + (unicodeVersion[3]);
|
||||
|
||||
if( normVer < uniVer &&
|
||||
((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
|
||||
){
|
||||
*errorCode = U_INVALID_FORMAT_ERROR;
|
||||
udata_close(dataMemory);
|
||||
return FALSE;
|
||||
}
|
||||
profile->isDataLoaded = TRUE;
|
||||
|
||||
/* if a different thread set it first, then close the extra data */
|
||||
if(dataMemory!=NULL) {
|
||||
udata_close(dataMemory); /* NULL if it was set correctly */
|
||||
}
|
||||
|
||||
|
||||
return profile->isDataLoaded;
|
||||
}
|
||||
|
||||
static UStringPrepProfile*
|
||||
usprep_getProfile(const char* path,
|
||||
const char* name,
|
||||
UErrorCode *status){
|
||||
|
||||
UStringPrepProfile* profile = NULL;
|
||||
|
||||
initCache(status);
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
return NULL;
|
||||
}
|
||||
|
||||
UStringPrepKey stackKey;
|
||||
/*
|
||||
* const is cast way to save malloc, strcpy and free calls
|
||||
* we use the passed in pointers for fetching the data from the
|
||||
* hash table which is safe
|
||||
*/
|
||||
stackKey.name = (char*) name;
|
||||
stackKey.path = (char*) path;
|
||||
|
||||
/* fetch the data from the cache */
|
||||
profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
|
||||
|
||||
if(profile == NULL){
|
||||
UStringPrepKey* key = (UStringPrepKey*) uprv_malloc(sizeof(UStringPrepKey));
|
||||
if(key == NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
/* else load the data and put the data in the cache */
|
||||
profile = (UStringPrepProfile*) uprv_malloc(sizeof(UStringPrepProfile));
|
||||
if(profile == NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
uprv_free(key);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* initialize the data struct members */
|
||||
uprv_memset(profile->indexes,0,sizeof(profile->indexes));
|
||||
profile->mappingData = NULL;
|
||||
profile->sprepData = NULL;
|
||||
profile->refCount = 0;
|
||||
|
||||
/* initialize the key memebers */
|
||||
key->name = (char*) uprv_malloc(strlen(name)+1);
|
||||
if(key->name == NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
uprv_free(key);
|
||||
uprv_free(profile);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
uprv_strcpy(key->name, name);
|
||||
|
||||
key->path=NULL;
|
||||
|
||||
if(path != NULL){
|
||||
key->path = (char*) uprv_malloc(strlen(path)+1);
|
||||
if(key->path == NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
uprv_free(key->path);
|
||||
uprv_free(key);
|
||||
uprv_free(profile);
|
||||
return NULL;
|
||||
}
|
||||
uprv_strcpy(key->path, path);
|
||||
}
|
||||
|
||||
/* load the data */
|
||||
if(!loadData(profile, path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
|
||||
return NULL;
|
||||
}
|
||||
umtx_lock(&usprepMutex);
|
||||
/* add the data object to the cache */
|
||||
uhash_put(SHARED_DATA_HASHTABLE, key, profile, status);
|
||||
umtx_unlock(&usprepMutex);
|
||||
}
|
||||
umtx_lock(&usprepMutex);
|
||||
/* increment the refcount */
|
||||
profile->refCount++;
|
||||
umtx_unlock(&usprepMutex);
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
U_CAPI UStringPrepProfile* U_EXPORT2
|
||||
usprep_open(const char* path,
|
||||
const char* name,
|
||||
UErrorCode* status){
|
||||
|
||||
if(status == NULL || U_FAILURE(*status)){
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* initialize the profile struct members */
|
||||
return usprep_getProfile(path,name,status);;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
usprep_close(UStringPrepProfile* profile){
|
||||
if(profile==NULL){
|
||||
return;
|
||||
}
|
||||
|
||||
umtx_lock(&usprepMutex);
|
||||
/* decrement the ref count*/
|
||||
if(profile->refCount > 0){
|
||||
profile->refCount--;
|
||||
}
|
||||
umtx_unlock(&usprepMutex);
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
usprep_unload(UStringPrepProfile* data){
|
||||
udata_close(data->sprepData);
|
||||
}
|
||||
|
||||
|
||||
static int32_t
|
||||
usprep_internal_flushCache(UBool noRefCount){
|
||||
UStringPrepProfile *profile = NULL;
|
||||
UStringPrepKey *key = NULL;
|
||||
int32_t pos = -1;
|
||||
int32_t deletedNum = 0;
|
||||
const UHashElement *e;
|
||||
|
||||
/*
|
||||
* if shared data hasn't even been lazy evaluated yet
|
||||
* return 0
|
||||
*/
|
||||
umtx_lock(&usprepMutex);
|
||||
if (SHARED_DATA_HASHTABLE == NULL) {
|
||||
umtx_unlock(&usprepMutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*creates an enumeration to iterate through every element in the table */
|
||||
while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
|
||||
{
|
||||
profile = (UStringPrepProfile *) e->value.pointer;
|
||||
key = (UStringPrepKey *) e->key.pointer;
|
||||
|
||||
if ((noRefCount== FALSE && profile->refCount == 0) ||
|
||||
noRefCount== TRUE) {
|
||||
deletedNum++;
|
||||
uhash_removeElement(SHARED_DATA_HASHTABLE, e);
|
||||
|
||||
/* unload the data */
|
||||
usprep_unload(profile);
|
||||
|
||||
if(key->name != NULL) {
|
||||
uprv_free(key->name);
|
||||
key->name=NULL;
|
||||
}
|
||||
if(key->path != NULL) {
|
||||
uprv_free(key->path);
|
||||
key->path=NULL;
|
||||
}
|
||||
uprv_free(profile);
|
||||
uprv_free(key);
|
||||
}
|
||||
|
||||
}
|
||||
umtx_unlock(&usprepMutex);
|
||||
|
||||
return deletedNum;
|
||||
}
|
||||
|
||||
/* Works just like ucnv_flushCache() */
|
||||
static int32_t
|
||||
usprep_flushCache(){
|
||||
return usprep_internal_flushCache(FALSE);
|
||||
}
|
||||
|
||||
U_CFUNC UBool
|
||||
usprep_cleanup(void){
|
||||
if (SHARED_DATA_HASHTABLE != NULL) {
|
||||
usprep_internal_flushCache(TRUE);
|
||||
if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
|
||||
uhash_close(SHARED_DATA_HASHTABLE);
|
||||
SHARED_DATA_HASHTABLE = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
umtx_destroy(&usprepMutex); /* Don't worry about destroying the mutex even */
|
||||
/* if the hash table still exists. The mutex */
|
||||
/* will lazily re-init itself if needed. */
|
||||
return (SHARED_DATA_HASHTABLE == NULL);
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
uprv_syntaxError(const UChar* rules,
|
||||
int32_t pos,
|
||||
int32_t rulesLen,
|
||||
UParseError* parseError){
|
||||
if(parseError == NULL){
|
||||
return;
|
||||
}
|
||||
if(pos == rulesLen && rulesLen >0){
|
||||
pos--;
|
||||
}
|
||||
parseError->offset = pos;
|
||||
parseError->line = 0 ; // we are not using line numbers
|
||||
|
||||
// for pre-context
|
||||
int32_t start = (pos <=U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
|
||||
int32_t stop = pos;
|
||||
|
||||
u_memcpy(parseError->preContext,rules+start,stop-start);
|
||||
//null terminate the buffer
|
||||
parseError->preContext[stop-start] = 0;
|
||||
|
||||
//for post-context
|
||||
start = pos;
|
||||
if(start<rulesLen) {
|
||||
U16_FWD_1(rules, start, rulesLen);
|
||||
}
|
||||
|
||||
stop = ((pos+U_PARSE_CONTEXT_LEN)<= rulesLen )? (pos+(U_PARSE_CONTEXT_LEN)) :
|
||||
rulesLen;
|
||||
if(start < stop){
|
||||
u_memcpy(parseError->postContext,rules+start,stop-start);
|
||||
//null terminate the buffer
|
||||
parseError->postContext[stop-start]= 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
static inline UStringPrepType
|
||||
getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
|
||||
|
||||
UStringPrepType type;
|
||||
if(trieWord == 0){
|
||||
/*
|
||||
* Initial value stored in the mapping table
|
||||
* just return USPREP_TYPE_LIMIT .. so that
|
||||
* the source codepoint is copied to the destination
|
||||
*/
|
||||
type = USPREP_TYPE_LIMIT;
|
||||
}else if(trieWord >= _SPREP_TYPE_THRESHOLD){
|
||||
type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
|
||||
}else{
|
||||
/* get the type */
|
||||
type = USPREP_MAP;
|
||||
/* ascertain if the value is index or delta */
|
||||
if(trieWord & 0x02){
|
||||
isIndex = TRUE;
|
||||
value = trieWord >> 2; //mask off the lower 2 bits and shift
|
||||
|
||||
}else{
|
||||
isIndex = FALSE;
|
||||
value = (int16_t)trieWord;
|
||||
value = (value >> 2);
|
||||
|
||||
}
|
||||
|
||||
if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
|
||||
type = USPREP_DELETE;
|
||||
isIndex =FALSE;
|
||||
value = 0;
|
||||
}
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int32_t
|
||||
usprep_map( const UStringPrepProfile* profile,
|
||||
const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status ){
|
||||
|
||||
uint16_t result;
|
||||
int32_t destIndex=0;
|
||||
int32_t srcIndex;
|
||||
UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
|
||||
UStringPrepType type;
|
||||
int16_t value;
|
||||
UBool isIndex;
|
||||
int32_t* indexes = (int32_t*)profile->indexes;
|
||||
|
||||
// no error checking the caller check for error and arguments
|
||||
// no string length check the caller finds out the string length
|
||||
|
||||
for(srcIndex=0;srcIndex<srcLength;){
|
||||
UChar32 ch;
|
||||
|
||||
U16_NEXT(src,srcIndex,srcLength,ch);
|
||||
|
||||
result=0;
|
||||
|
||||
UTRIE_GET16(&profile->sprepTrie,ch,result);
|
||||
|
||||
type = getValues(result, value, isIndex);
|
||||
|
||||
// check if the source codepoint is unassigned
|
||||
if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
|
||||
|
||||
uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
|
||||
*status = U_STRINGPREP_UNASSIGNED_ERROR;
|
||||
return 0;
|
||||
|
||||
}else if(type == USPREP_MAP){
|
||||
|
||||
int32_t index, length;
|
||||
|
||||
if(isIndex){
|
||||
index = value;
|
||||
if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
|
||||
index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
|
||||
length = 1;
|
||||
}else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
|
||||
index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
|
||||
length = 2;
|
||||
}else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
|
||||
index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
|
||||
length = 3;
|
||||
}else{
|
||||
length = profile->mappingData[index++];
|
||||
|
||||
}
|
||||
|
||||
/* copy mapping to destination */
|
||||
for(int32_t i=0; i< length; i++){
|
||||
if(destIndex < destCapacity ){
|
||||
dest[destIndex] = profile->mappingData[index+i];
|
||||
}
|
||||
destIndex++; /* for pre-flighting */
|
||||
}
|
||||
continue;
|
||||
}else{
|
||||
// subtract the delta to arrive at the code point
|
||||
ch -= value;
|
||||
}
|
||||
|
||||
}else if(type==USPREP_DELETE){
|
||||
// just consume the codepoint and contine
|
||||
continue;
|
||||
}
|
||||
//copy the code point into destination
|
||||
if(ch <= 0xFFFF){
|
||||
if(destIndex < destCapacity ){
|
||||
dest[destIndex] = (UChar)ch;
|
||||
}
|
||||
destIndex++;
|
||||
}else{
|
||||
if(destIndex+1 < destCapacity ){
|
||||
dest[destIndex] = U16_LEAD(ch);
|
||||
dest[destIndex+1] = U16_TRAIL(ch);
|
||||
}
|
||||
destIndex +=2;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, destIndex, status);
|
||||
}
|
||||
|
||||
|
||||
static int32_t
|
||||
usprep_normalize( const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
UErrorCode* status ){
|
||||
|
||||
return unorm_normalize(src,srcLength,UNORM_NFKC,UNORM_UNICODE_3_2,dest,destCapacity,status);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
1) Map -- For each character in the input, check if it has a mapping
|
||||
and, if so, replace it with its mapping.
|
||||
|
||||
2) Normalize -- Possibly normalize the result of step 1 using Unicode
|
||||
normalization.
|
||||
|
||||
3) Prohibit -- Check for any characters that are not allowed in the
|
||||
output. If any are found, return an error.
|
||||
|
||||
4) Check bidi -- Possibly check for right-to-left characters, and if
|
||||
any are found, make sure that the whole string satisfies the
|
||||
requirements for bidirectional strings. If the string does not
|
||||
satisfy the requirements for bidirectional strings, return an
|
||||
error.
|
||||
[Unicode3.2] defines several bidirectional categories; each character
|
||||
has one bidirectional category assigned to it. For the purposes of
|
||||
the requirements below, an "RandALCat character" is a character that
|
||||
has Unicode bidirectional categories "R" or "AL"; an "LCat character"
|
||||
is a character that has Unicode bidirectional category "L". Note
|
||||
|
||||
|
||||
that there are many characters which fall in neither of the above
|
||||
definitions; Latin digits (<U+0030> through <U+0039>) are examples of
|
||||
this because they have bidirectional category "EN".
|
||||
|
||||
In any profile that specifies bidirectional character handling, all
|
||||
three of the following requirements MUST be met:
|
||||
|
||||
1) The characters in section 5.8 MUST be prohibited.
|
||||
|
||||
2) If a string contains any RandALCat character, the string MUST NOT
|
||||
contain any LCat character.
|
||||
|
||||
3) If a string contains any RandALCat character, a RandALCat
|
||||
character MUST be the first character of the string, and a
|
||||
RandALCat character MUST be the last character of the string.
|
||||
*/
|
||||
|
||||
#define MAX_STACK_BUFFER_SIZE 300
|
||||
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
usprep_prepare( const UStringPrepProfile* profile,
|
||||
const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
int32_t options,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status ){
|
||||
|
||||
// check error status
|
||||
if(status == NULL || U_FAILURE(*status)){
|
||||
return 0;
|
||||
}
|
||||
|
||||
//check arguments
|
||||
if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
|
||||
*status=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
|
||||
UChar *b1 = b1Stack, *b2 = b2Stack;
|
||||
int32_t b1Len, b2Len=0,
|
||||
b1Capacity = MAX_STACK_BUFFER_SIZE ,
|
||||
b2Capacity = MAX_STACK_BUFFER_SIZE;
|
||||
uint16_t result;
|
||||
int32_t b2Index = 0;
|
||||
UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
|
||||
UBool leftToRight=FALSE, rightToLeft=FALSE;
|
||||
int32_t rtlPos =-1, ltrPos =-1;
|
||||
const int32_t *indexes = profile->indexes;
|
||||
|
||||
// get the options
|
||||
UBool doNFKC = (UBool)((indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
|
||||
UBool checkBiDi = (UBool)((indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
|
||||
|
||||
//get the string length
|
||||
if(srcLength == -1){
|
||||
srcLength = u_strlen(src);
|
||||
}
|
||||
// map
|
||||
b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
|
||||
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// redo processing of string
|
||||
/* we do not have enough room so grow the buffer*/
|
||||
b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
|
||||
if(b1==NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
*status = U_ZERO_ERROR; // reset error
|
||||
|
||||
b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
|
||||
|
||||
}
|
||||
|
||||
// normalize
|
||||
if(doNFKC == TRUE){
|
||||
b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
|
||||
}else{
|
||||
b2 = b1;
|
||||
b2Len = b1Len;
|
||||
}
|
||||
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
// redo processing of string
|
||||
/* we do not have enough room so grow the buffer*/
|
||||
b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
|
||||
if(b2==NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
*status = U_ZERO_ERROR; // reset error
|
||||
|
||||
b2Len = usprep_normalize(b2,b2Len, b2,b2Len,status);
|
||||
|
||||
}
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
UChar32 ch;
|
||||
UStringPrepType type;
|
||||
int16_t value;
|
||||
UBool isIndex;
|
||||
|
||||
// Prohibit and checkBiDi in one pass
|
||||
for(b2Index=0; b2Index<b2Len;){
|
||||
|
||||
ch = 0;
|
||||
|
||||
U16_NEXT(b2, b2Index, b2Len, ch);
|
||||
|
||||
UTRIE_GET16(&profile->sprepTrie,ch,result);
|
||||
|
||||
type = getValues(result, value, isIndex);
|
||||
|
||||
if( type == USPREP_PROHIBITED ||
|
||||
((result < _SPREP_TYPE_THRESHOLD) && (result&0x01))){
|
||||
*status = U_STRINGPREP_PROHIBITED_ERROR;
|
||||
uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
direction = u_charDirection(ch);
|
||||
if(firstCharDir == U_CHAR_DIRECTION_COUNT){
|
||||
firstCharDir = direction;
|
||||
}
|
||||
if(direction == U_LEFT_TO_RIGHT){
|
||||
leftToRight = TRUE;
|
||||
ltrPos = b2Index-1;
|
||||
}
|
||||
if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
|
||||
rightToLeft = TRUE;
|
||||
rtlPos = b2Index-1;
|
||||
}
|
||||
}
|
||||
if(checkBiDi == TRUE){
|
||||
// satisfy 2
|
||||
if( leftToRight == TRUE && rightToLeft == TRUE){
|
||||
*status = U_STRINGPREP_CHECK_BIDI_ERROR;
|
||||
uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
//satisfy 3
|
||||
if( rightToLeft == TRUE &&
|
||||
!((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
|
||||
(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
|
||||
){
|
||||
*status = U_STRINGPREP_CHECK_BIDI_ERROR;
|
||||
uprv_syntaxError(b2, rtlPos, b2Len, parseError);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
if(b2Len <= destCapacity){
|
||||
uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
|
||||
}
|
||||
|
||||
CLEANUP:
|
||||
if(b1!=b1Stack){
|
||||
uprv_free(b1);
|
||||
}
|
||||
if(b1!=b1Stack && b2!=b2Stack){
|
||||
uprv_free(b2);
|
||||
}
|
||||
return u_terminateUChars(dest, destCapacity, b2Len, status);
|
||||
}
|
||||
|
||||
|
||||
U_CFUNC UBool
|
||||
usprep_isLabelSeparator(UStringPrepProfile* profile,
|
||||
UChar32 ch, UErrorCode* status){
|
||||
// check error status
|
||||
if(status==NULL || U_FAILURE(*status)){
|
||||
return FALSE;
|
||||
}
|
||||
//check the arguments
|
||||
if(profile==NULL){
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
uint16_t result;
|
||||
UStringPrepType type;
|
||||
int16_t value;
|
||||
UBool isIndex;
|
||||
|
||||
UTRIE_GET16(&profile->sprepTrie,ch, result);
|
||||
|
||||
type = getValues(result,value,isIndex);
|
||||
|
||||
if( type == USPREP_LABEL_SEPARATOR){
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_IDNA */
|
|
@ -77,7 +77,7 @@ u_strFromUTF32(UChar *dest,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -163,7 +163,7 @@ u_strToUTF32(UChar32 *dest,
|
|||
}
|
||||
|
||||
|
||||
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -234,7 +234,7 @@ u_strFromUTF8(UChar *dest,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -332,7 +332,7 @@ u_strToUTF8(char *dest,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
if((pSrc==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -614,7 +614,7 @@ u_strToWCS(wchar_t *dest,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -866,7 +866,7 @@ u_strFromWCS(UChar *dest,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -34,7 +34,8 @@
|
|||
U_CAPI UNewTrie * U_EXPORT2
|
||||
utrie_open(UNewTrie *fillIn,
|
||||
uint32_t *aliasData, int32_t maxDataLength,
|
||||
uint32_t initialValue, UBool latin1Linear) {
|
||||
uint32_t initialValue, uint32_t leadUnitValue,
|
||||
UBool latin1Linear) {
|
||||
UNewTrie *trie;
|
||||
int32_t i, j;
|
||||
|
||||
|
@ -89,6 +90,7 @@ utrie_open(UNewTrie *fillIn,
|
|||
trie->data[--j]=initialValue;
|
||||
}
|
||||
|
||||
trie->leadUnitValue=leadUnitValue;
|
||||
trie->indexLength=UTRIE_MAX_INDEX_LENGTH;
|
||||
trie->dataCapacity=maxDataLength;
|
||||
trie->isLatin1Linear=latin1Linear;
|
||||
|
@ -118,7 +120,9 @@ utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_
|
|||
isDataAllocated=TRUE;
|
||||
}
|
||||
|
||||
trie=utrie_open(fillIn, aliasData, aliasDataCapacity, other->data[0], other->isLatin1Linear);
|
||||
trie=utrie_open(fillIn, aliasData, aliasDataCapacity,
|
||||
other->data[0], other->leadUnitValue,
|
||||
other->isLatin1Linear);
|
||||
if(trie==NULL) {
|
||||
uprv_free(aliasData);
|
||||
} else {
|
||||
|
@ -396,6 +400,22 @@ utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *p
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* set all values for lead surrogate code *units* to leadUnitValue
|
||||
* so that by default runtime lookups will find no data for associated
|
||||
* supplementary code points, unless there is data for such code points
|
||||
* which will result in a non-zero folding value below that is set for
|
||||
* the respective lead units
|
||||
*
|
||||
* the above saved the indexes for surrogate code *points* and
|
||||
* write-protected their data values
|
||||
*/
|
||||
if(!utrie_setRange32(trie, 0xd800, 0xdc00, trie->leadUnitValue, TRUE)) {
|
||||
/* data table overflow */
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fold significant index values into the area just after the BMP indexes.
|
||||
* In case the first lead surrogate has significant data,
|
||||
|
@ -418,15 +438,19 @@ utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *p
|
|||
/* is there an identical index block? */
|
||||
block=_findSameIndexBlock(index, indexLength, c>>UTRIE_SHIFT);
|
||||
|
||||
/* get a folded value for [c..c+0x400[ and, if 0, set it for the lead surrogate */
|
||||
/*
|
||||
* get a folded value for [c..c+0x400[ and,
|
||||
* if different from the value for the lead surrogate code point,
|
||||
* set it for the lead surrogate code unit
|
||||
*/
|
||||
value=getFoldedValue(trie, c, block+UTRIE_SURROGATE_BLOCK_COUNT);
|
||||
if(value!=0) {
|
||||
if(!utrie_set32(trie, 0xd7c0+(c>>10), value)) {
|
||||
if(value!=utrie_get32(trie, U16_LEAD(c), NULL)) {
|
||||
if(!utrie_set32(trie, U16_LEAD(c), value)) {
|
||||
/* data table overflow */
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
utrie_get32(trie, U16_LEAD(c), NULL);
|
||||
/* if we did not find an identical index block... */
|
||||
if(block==indexLength) {
|
||||
/* move the actual index (stage 1) entries from the supplementary position to the new one */
|
||||
|
@ -435,6 +459,7 @@ utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *p
|
|||
4*UTRIE_SURROGATE_BLOCK_COUNT);
|
||||
indexLength+=UTRIE_SURROGATE_BLOCK_COUNT;
|
||||
}
|
||||
utrie_get32(trie, U16_LEAD(c), NULL);
|
||||
}
|
||||
c+=0x400;
|
||||
} else {
|
||||
|
@ -727,9 +752,11 @@ utrie_serialize(UNewTrie *trie, void *dt, int32_t capacity,
|
|||
|
||||
/* fold the supplementary part of the index array */
|
||||
utrie_fold(trie, getFoldedValue, pErrorCode);
|
||||
utrie_get32(trie, U16_LEAD(0x10400), NULL);
|
||||
|
||||
/* compact again with overlap for minimum data array length */
|
||||
utrie_compact(trie, TRUE, pErrorCode);
|
||||
utrie_get32(trie, U16_LEAD(0x10400), NULL);
|
||||
|
||||
trie->isCompacted=TRUE;
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
|
|
|
@ -492,6 +492,7 @@ struct UNewTrie {
|
|||
int32_t index[UTRIE_MAX_INDEX_LENGTH];
|
||||
uint32_t *data;
|
||||
|
||||
uint32_t leadUnitValue;
|
||||
int32_t indexLength, dataCapacity, dataLength;
|
||||
UBool isAllocated, isDataAllocated;
|
||||
UBool isLatin1Linear, isCompacted;
|
||||
|
@ -546,6 +547,8 @@ UNewTrieGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset);
|
|||
* @param maxDataLength the capacity of aliasData (if not NULL) or
|
||||
* the length of the data array to be allocated
|
||||
* @param initialValue the initial value that is set for all code points
|
||||
* @param leadUnitValue the value for lead surrogate code _units_ that do not
|
||||
* have associated supplementary data
|
||||
* @param latin1Linear a flag indicating whether the Latin-1 range is to be allocated and
|
||||
* kept in a linear, contiguous part of the data array
|
||||
* @return a pointer to the initialized fillIn or the allocated and initialized new UNewTrie
|
||||
|
@ -553,7 +556,8 @@ UNewTrieGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset);
|
|||
U_CAPI UNewTrie * U_EXPORT2
|
||||
utrie_open(UNewTrie *fillIn,
|
||||
uint32_t *aliasData, int32_t maxDataLength,
|
||||
uint32_t initialValue, UBool latin1Linear);
|
||||
uint32_t initialValue, uint32_t leadUnitValue,
|
||||
UBool latin1Linear);
|
||||
|
||||
/**
|
||||
* Clone a build-time trie structure with all entries.
|
||||
|
|
|
@ -136,7 +136,7 @@ package390: $(BUILDDIR)/icudata390.lst $(BUILDDIR)/icudata.lst ./icupkg.inc
|
|||
##### Define all the data files. the build rule that depends on them is below.
|
||||
|
||||
## DAT files - Misc. data files.
|
||||
DAT_FILES_SHORT=uprops.icu pnames.icu unames.icu unorm.icu cnvalias.icu tz.icu ucadata.icu invuca.icu uidna.icu
|
||||
DAT_FILES_SHORT=uprops.icu pnames.icu unames.icu unorm.icu cnvalias.icu tz.icu ucadata.icu invuca.icu uidna.spp
|
||||
DAT_FILES=$(DAT_FILES_SHORT:%=$(BUILDDIR)/$(ICUDT)%)
|
||||
|
||||
## BRK files
|
||||
|
@ -231,9 +231,9 @@ $(BUILDDIR)/$(ICUDT)cnvalias.icu: $(UCMSRCDIR)/convrtrs.txt $(TOOLDIR)/gencnval/
|
|||
$(BUILDDIR)/$(ICUDT)tz.icu: $(MISCSRCDIR)/timezone.txt $(TOOLDIR)/gentz/gentz$(EXEEXT)
|
||||
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/gentz/gentz -d $(BUILDDIR) $(MISCSRCDIR)/timezone.txt
|
||||
|
||||
# uidna.icu
|
||||
$(BUILDDIR)/$(ICUDT)uidna.icu: $(MISCSRCDIR)/rfc3454_A_1.txt $(MISCSRCDIR)/rfc3454_B_1.txt $(MISCSRCDIR)/rfc3454_B_2.txt $(MISCSRCDIR)/rfc3454_C_X.txt $(TOOLDIR)/genidna/genidna$(EXEEXT)
|
||||
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/genidna/genidna -d $(BUILDDIR) -s $(SRCDATADIR)
|
||||
# uidna.spp
|
||||
$(BUILDDIR)/$(ICUDT)uidna.spp: $(MISCSRCDIR)/NamePrepProfile.txt $(TOOLDIR)/gensprep/gensprep$(EXEEXT)
|
||||
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/gensprep/gensprep -d $(BUILDDIR) -s $(MISSRCDIR) -b uidna -n $(UNICODEDATADIR) -u 3.2.0 -k NamePrepProfile.txt
|
||||
|
||||
#################################################### BRK
|
||||
# BRK FILES
|
||||
|
@ -314,7 +314,7 @@ TESTDATA=testdata
|
|||
TESTDT=$(TESTDATA)_
|
||||
|
||||
# File definitions
|
||||
TEST_DAT_FILES=$(TESTBUILDDIR)/$(TESTDT)test.icu
|
||||
TEST_DAT_FILES=$(TESTBUILDDIR)/$(TESTDT)test.icu $(TESTBUILDDIR)/$(TESTDT)nfscsi.spp $(TESTBUILDDIR)/$(TESTDT)nfscss.spp $(TESTBUILDDIR)/$(TESTDT)nfscis.spp $(TESTBUILDDIR)/$(TESTDT)nfsmxs.spp $(TESTBUILDDIR)/$(TESTDT)nfsmxp.spp
|
||||
|
||||
TEST_UCM_SOURCE= test1.ucm test3.ucm test4.ucm ibm9027.ucm
|
||||
TEST_UCM_FILES=$(TEST_UCM_SOURCE:%=$(TESTSRCDATADIR)/data/%)
|
||||
|
@ -341,6 +341,31 @@ build-testdata: $(ALL_TEST_FILES) $(TESTBUILDDIR)/testdata.lst $(TESTBUILDDIR)/
|
|||
$(TESTBUILDDIR)/$(TESTDT)test.icu: $(TOOLDIR)/gentest/gentest$(EXEEXT)
|
||||
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/gentest/gentest -d $(TESTBUILDDIR)
|
||||
|
||||
# Targets for nfscsi.icu
|
||||
$(TESTBUILDDIR)/$(TESTDT)nfscsi.icu: $(TOOLDIR)/gensprep/gensprep$(EXEEXT) $(TESTSRCDATADIR)/nfs4_cs_prep_ci.txt
|
||||
@echo Building nfscsi.icu
|
||||
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/gensprep/gensprep -s $(TESTSRCDATADIR) -d $(TESTBUILDDIR) -b nfscsi -p $(TESTDATA) -u 3.2.0 nfs4_cs_prep_ci.txt
|
||||
|
||||
# Targets for nfscss.icu
|
||||
$(TESTBUILDDIR)/$(TESTDT)nfscss.icu: $(TOOLDIR)/gensprep/gensprep$(EXEEXT) $(TESTSRCDATADIR)/nfs4_cs_prep_cs.txt
|
||||
@echo Building nfscss.icu
|
||||
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/gensprep/gensprep -s $(TESTSRCDATADIR) -d $(TESTBUILDDIR) -b nfscss -p $(TESTDATA) -u 3.2.0 nfs4_cs_prep_cs.txt
|
||||
|
||||
# Targets for nfscis.spp
|
||||
$(TESTBUILDDIR)/$(TESTDT)nfscis.spp: $(TOOLDIR)/gensprep/gensprep$(EXEEXT) $(TESTSRCDATADIR)/nfs4_cis_prep.txt
|
||||
@echo Building nfscis.spp
|
||||
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/gensprep/gensprep -s $(TESTSRCDATADIR) -d $(TESTBUILDDIR) -b nfscis -p $(TESTDATA) -k -n $(UNICODEDATADIR) -u 3.2.0 nfs4_cis_prep.txt
|
||||
|
||||
# Targets for nfsmxs.spp
|
||||
$(TESTBUILDDIR)/$(TESTDT)nfsmxs.spp: $(TOOLDIR)/gensprep/gensprep$(EXEEXT) $(TESTSRCDATADIR)/nfs4_mixed_prep_s.txt
|
||||
@echo Building nfsmxs.spp
|
||||
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/gensprep/gensprep -s $(TESTSRCDATADIR) -d $(TESTBUILDDIR) -b nfsmxs -p $(TESTDATA) -k -n $(UNICODEDATADIR) -u 3.2.0 nfs4_mixed_prep_s.txt
|
||||
|
||||
# Targets for nfsmxp.spp
|
||||
$(TESTBUILDDIR)/$(TESTDT)nfsmxp.spp: $(TOOLDIR)/gensprep/gensprep$(EXEEXT) $(TESTSRCDATADIR)/nfs4_mixed_prep_p.txt
|
||||
@echo Building nfsmxp.spp
|
||||
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/gensprep/gensprep -s $(TESTSRCDATADIR) -d $(TESTBUILDDIR) -b nfsmxp -p $(TESTDATA) -k -n $(UNICODEDATADIR) -u 3.2.0 nfs4_mixed_prep_p.txt
|
||||
|
||||
$(TESTBUILDDIR)/$(TESTDT)%.cnv: $(TESTSRCDATADIR)/%.ucm $(TOOLDIR)/makeconv/makeconv$(EXEEXT)
|
||||
ICU_DATA=$(BUILDDIR) $(INVOKE) $(TOOLDIR)/makeconv/makeconv -p $(TESTDATA) -c -d $(TESTBUILDDIR) $(TESTSRCDATADIR)/$(<F)
|
||||
|
||||
|
|
|
@ -245,7 +245,7 @@ BRK_FILES = $(ICUDT)sent.brk $(ICUDT)char.brk $(ICUDT)line.brk $(ICUDT)word.brk
|
|||
# move the .dll and .lib files to their final destination afterwards.
|
||||
# The $(U_ICUDATA_NAME).lib and $(U_ICUDATA_NAME).exp should already be in the right place due to stubdata.
|
||||
#
|
||||
"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata.exe" $(CNV_FILES) $(BRK_FILES) "$(ICUBLD)\$(ICUDT)uprops.icu" "$(ICUBLD)\$(ICUDT)unames.icu" "$(ICUBLD)\$(ICUDT)pnames.icu" "$(ICUBLD)\$(ICUDT)unorm.icu" "$(ICUBLD)\$(ICUDT)cnvalias.icu" "$(ICUBLD)\$(ICUDT)tz.icu" "$(ICUBLD)\$(ICUDT)ucadata.icu" "$(ICUBLD)\$(ICUDT)invuca.icu" "$(ICUBLD)\$(ICUDT)uidna.icu" $(ALL_RES) "$(ICUBLD)\$(ICUDT)icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt"
|
||||
"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata.exe" $(CNV_FILES) $(BRK_FILES) "$(ICUBLD)\$(ICUDT)uprops.icu" "$(ICUBLD)\$(ICUDT)unames.icu" "$(ICUBLD)\$(ICUDT)pnames.icu" "$(ICUBLD)\$(ICUDT)unorm.icu" "$(ICUBLD)\$(ICUDT)cnvalias.icu" "$(ICUBLD)\$(ICUDT)tz.icu" "$(ICUBLD)\$(ICUDT)ucadata.icu" "$(ICUBLD)\$(ICUDT)invuca.icu" "$(ICUBLD)\$(ICUDT)uidna.spp" $(ALL_RES) "$(ICUBLD)\$(ICUDT)icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt"
|
||||
@echo Building icu data
|
||||
@cd "$(ICUBLD)"
|
||||
@"$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata" -f -e $(U_ICUDATA_NAME) -v -m dll -c -p $(ICUPKG) -O "$(PKGOPT)" -d "$(ICUBLD)" -s . <<pkgdatain.txt
|
||||
|
@ -255,7 +255,7 @@ $(ICUDT)pnames.icu
|
|||
$(ICUDT)unames.icu
|
||||
$(ICUDT)ucadata.icu
|
||||
$(ICUDT)invuca.icu
|
||||
$(ICUDT)uidna.icu
|
||||
$(ICUDT)uidna.spp
|
||||
$(ICUDT)tz.icu
|
||||
$(ICUDT)cnvalias.icu
|
||||
$(CNV_FILES:.cnv =.cnv
|
||||
|
@ -409,9 +409,9 @@ res_index {
|
|||
@set ICU_DATA=$(ICUBLD)
|
||||
@"$(ICUTOOLS)\genuca\$(CFG)\genuca" -s "$(ICUUNIDATA)"
|
||||
|
||||
# Targets for uidna.icu
|
||||
"$(ICUBLD)\$(ICUDT)uidna.icu" : "$(ICUUNIDATA)\*.txt" "$(ICUMISC)\*.txt"
|
||||
genidna -s "$(ICUDATA)" -d "$(ICUBLD)\\"
|
||||
# Targets for uidna.spp
|
||||
"$(ICUBLD)\$(ICUDT)uidna.spp" : "$(ICUUNIDATA)\*.txt" "$(ICUMISC)\NamePrepProfile.txt"
|
||||
gensprep -s "$(ICUMISC)" -d "$(ICUBLD)\\" -b uidna -n "$(ICUUNIDATA)" -k -u 3.2.0 NamePrepProfile.txt
|
||||
|
||||
# Dependencies on the tools for the batch inference rules
|
||||
|
||||
|
|
1952
icu4c/source/data/misc/NamePrepProfile.txt
Normal file
1952
icu4c/source/data/misc/NamePrepProfile.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,407 +0,0 @@
|
|||
###################
|
||||
# Copyright (C) 2003, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
|
||||
#################
|
||||
|
||||
# This file contains code points from Table A.1 from RFC 3454
|
||||
|
||||
0221;
|
||||
0234..024F;
|
||||
02AE..02AF;
|
||||
02EF..02FF;
|
||||
0350..035F;
|
||||
0370..0373;
|
||||
0376..0379;
|
||||
037B..037D;
|
||||
037F..0383;
|
||||
038B;
|
||||
038D;
|
||||
03A2;
|
||||
03CF;
|
||||
03F7..03FF;
|
||||
0487;
|
||||
04CF;
|
||||
04F6..04F7;
|
||||
04FA..04FF;
|
||||
0510..0530;
|
||||
0557..0558;
|
||||
0560;
|
||||
0588;
|
||||
058B..0590;
|
||||
05A2;
|
||||
05BA;
|
||||
05C5..05CF;
|
||||
05EB..05EF;
|
||||
05F5..060B;
|
||||
060D..061A;
|
||||
061C..061E;
|
||||
0620;
|
||||
063B..063F;
|
||||
0656..065F;
|
||||
06EE..06EF;
|
||||
06FF;
|
||||
070E;
|
||||
072D..072F;
|
||||
074B..077F;
|
||||
07B2..0900;
|
||||
0904;
|
||||
093A..093B;
|
||||
094E..094F;
|
||||
0955..0957;
|
||||
0971..0980;
|
||||
0984;
|
||||
098D..098E;
|
||||
0991..0992;
|
||||
09A9;
|
||||
09B1;
|
||||
09B3..09B5;
|
||||
09BA..09BB;
|
||||
09BD;
|
||||
09C5..09C6;
|
||||
09C9..09CA;
|
||||
09CE..09D6;
|
||||
09D8..09DB;
|
||||
09DE;
|
||||
09E4..09E5;
|
||||
09FB..0A01;
|
||||
0A03..0A04;
|
||||
0A0B..0A0E;
|
||||
0A11..0A12;
|
||||
0A29;
|
||||
0A31;
|
||||
0A34;
|
||||
0A37;
|
||||
0A3A..0A3B;
|
||||
0A3D;
|
||||
0A43..0A46;
|
||||
0A49..0A4A;
|
||||
0A4E..0A58;
|
||||
0A5D;
|
||||
0A5F..0A65;
|
||||
0A75..0A80;
|
||||
0A84;
|
||||
0A8C;
|
||||
0A8E;
|
||||
0A92;
|
||||
0AA9;
|
||||
0AB1;
|
||||
0AB4;
|
||||
0ABA..0ABB;
|
||||
0AC6;
|
||||
0ACA;
|
||||
0ACE..0ACF;
|
||||
0AD1..0ADF;
|
||||
0AE1..0AE5;
|
||||
0AF0..0B00;
|
||||
0B04;
|
||||
0B0D..0B0E;
|
||||
0B11..0B12;
|
||||
0B29;
|
||||
0B31;
|
||||
0B34..0B35;
|
||||
0B3A..0B3B;
|
||||
0B44..0B46;
|
||||
0B49..0B4A;
|
||||
0B4E..0B55;
|
||||
0B58..0B5B;
|
||||
0B5E;
|
||||
0B62..0B65;
|
||||
0B71..0B81;
|
||||
0B84;
|
||||
0B8B..0B8D;
|
||||
0B91;
|
||||
0B96..0B98;
|
||||
0B9B;
|
||||
0B9D;
|
||||
0BA0..0BA2;
|
||||
0BA5..0BA7;
|
||||
0BAB..0BAD;
|
||||
0BB6;
|
||||
0BBA..0BBD;
|
||||
0BC3..0BC5;
|
||||
0BC9;
|
||||
0BCE..0BD6;
|
||||
0BD8..0BE6;
|
||||
0BF3..0C00;
|
||||
0C04;
|
||||
0C0D;
|
||||
0C11;
|
||||
0C29;
|
||||
0C34;
|
||||
0C3A..0C3D;
|
||||
0C45;
|
||||
0C49;
|
||||
0C4E..0C54;
|
||||
0C57..0C5F;
|
||||
0C62..0C65;
|
||||
0C70..0C81;
|
||||
0C84;
|
||||
0C8D;
|
||||
0C91;
|
||||
0CA9;
|
||||
0CB4;
|
||||
0CBA..0CBD;
|
||||
0CC5;
|
||||
0CC9;
|
||||
0CCE..0CD4;
|
||||
0CD7..0CDD;
|
||||
0CDF;
|
||||
0CE2..0CE5;
|
||||
0CF0..0D01;
|
||||
0D04;
|
||||
0D0D;
|
||||
0D11;
|
||||
0D29;
|
||||
0D3A..0D3D;
|
||||
0D44..0D45;
|
||||
0D49;
|
||||
0D4E..0D56;
|
||||
0D58..0D5F;
|
||||
0D62..0D65;
|
||||
0D70..0D81;
|
||||
0D84;
|
||||
0D97..0D99;
|
||||
0DB2;
|
||||
0DBC;
|
||||
0DBE..0DBF;
|
||||
0DC7..0DC9;
|
||||
0DCB..0DCE;
|
||||
0DD5;
|
||||
0DD7;
|
||||
0DE0..0DF1;
|
||||
0DF5..0E00;
|
||||
0E3B..0E3E;
|
||||
0E5C..0E80;
|
||||
0E83;
|
||||
0E85..0E86;
|
||||
0E89;
|
||||
0E8B..0E8C;
|
||||
0E8E..0E93;
|
||||
0E98;
|
||||
0EA0;
|
||||
0EA4;
|
||||
0EA6;
|
||||
0EA8..0EA9;
|
||||
0EAC;
|
||||
0EBA;
|
||||
0EBE..0EBF;
|
||||
0EC5;
|
||||
0EC7;
|
||||
0ECE..0ECF;
|
||||
0EDA..0EDB;
|
||||
0EDE..0EFF;
|
||||
0F48;
|
||||
0F6B..0F70;
|
||||
0F8C..0F8F;
|
||||
0F98;
|
||||
0FBD;
|
||||
0FCD..0FCE;
|
||||
0FD0..0FFF;
|
||||
1022;
|
||||
1028;
|
||||
102B;
|
||||
1033..1035;
|
||||
103A..103F;
|
||||
105A..109F;
|
||||
10C6..10CF;
|
||||
10F9..10FA;
|
||||
10FC..10FF;
|
||||
115A..115E;
|
||||
11A3..11A7;
|
||||
11FA..11FF;
|
||||
1207;
|
||||
1247;
|
||||
1249;
|
||||
124E..124F;
|
||||
1257;
|
||||
1259;
|
||||
125E..125F;
|
||||
1287;
|
||||
1289;
|
||||
128E..128F;
|
||||
12AF;
|
||||
12B1;
|
||||
12B6..12B7;
|
||||
12BF;
|
||||
12C1;
|
||||
12C6..12C7;
|
||||
12CF;
|
||||
12D7;
|
||||
12EF;
|
||||
130F;
|
||||
1311;
|
||||
1316..1317;
|
||||
131F;
|
||||
1347;
|
||||
135B..1360;
|
||||
137D..139F;
|
||||
13F5..1400;
|
||||
1677..167F;
|
||||
169D..169F;
|
||||
16F1..16FF;
|
||||
170D;
|
||||
1715..171F;
|
||||
1737..173F;
|
||||
1754..175F;
|
||||
176D;
|
||||
1771;
|
||||
1774..177F;
|
||||
17DD..17DF;
|
||||
17EA..17FF;
|
||||
180F;
|
||||
181A..181F;
|
||||
1878..187F;
|
||||
18AA..1DFF;
|
||||
1E9C..1E9F;
|
||||
1EFA..1EFF;
|
||||
1F16..1F17;
|
||||
1F1E..1F1F;
|
||||
1F46..1F47;
|
||||
1F4E..1F4F;
|
||||
1F58;
|
||||
1F5A;
|
||||
1F5C;
|
||||
1F5E;
|
||||
1F7E..1F7F;
|
||||
1FB5;
|
||||
1FC5;
|
||||
1FD4..1FD5;
|
||||
1FDC;
|
||||
1FF0..1FF1;
|
||||
1FF5;
|
||||
1FFF;
|
||||
2053..2056;
|
||||
2058..205E;
|
||||
2064..2069;
|
||||
2072..2073;
|
||||
208F..209F;
|
||||
20B2..20CF;
|
||||
20EB..20FF;
|
||||
213B..213C;
|
||||
214C..2152;
|
||||
2184..218F;
|
||||
23CF..23FF;
|
||||
2427..243F;
|
||||
244B..245F;
|
||||
24FF;
|
||||
2614..2615;
|
||||
2618;
|
||||
267E..267F;
|
||||
268A..2700;
|
||||
2705;
|
||||
270A..270B;
|
||||
2728;
|
||||
274C;
|
||||
274E;
|
||||
2753..2755;
|
||||
2757;
|
||||
275F..2760;
|
||||
2795..2797;
|
||||
27B0;
|
||||
27BF..27CF;
|
||||
27EC..27EF;
|
||||
2B00..2E7F;
|
||||
2E9A;
|
||||
2EF4..2EFF;
|
||||
2FD6..2FEF;
|
||||
2FFC..2FFF;
|
||||
3040;
|
||||
3097..3098;
|
||||
3100..3104;
|
||||
312D..3130;
|
||||
318F;
|
||||
31B8..31EF;
|
||||
321D..321F;
|
||||
3244..3250;
|
||||
327C..327E;
|
||||
32CC..32CF;
|
||||
32FF;
|
||||
3377..337A;
|
||||
33DE..33DF;
|
||||
33FF;
|
||||
4DB6..4DFF;
|
||||
9FA6..9FFF;
|
||||
A48D..A48F;
|
||||
A4C7..ABFF;
|
||||
D7A4..D7FF;
|
||||
FA2E..FA2F;
|
||||
FA6B..FAFF;
|
||||
FB07..FB12;
|
||||
FB18..FB1C;
|
||||
FB37;
|
||||
FB3D;
|
||||
FB3F;
|
||||
FB42;
|
||||
FB45;
|
||||
FBB2..FBD2;
|
||||
FD40..FD4F;
|
||||
FD90..FD91;
|
||||
FDC8..FDCF;
|
||||
FDFD..FDFF;
|
||||
FE10..FE1F;
|
||||
FE24..FE2F;
|
||||
FE47..FE48;
|
||||
FE53;
|
||||
FE67;
|
||||
FE6C..FE6F;
|
||||
FE75;
|
||||
FEFD..FEFE;
|
||||
FF00;
|
||||
FFBF..FFC1;
|
||||
FFC8..FFC9;
|
||||
FFD0..FFD1;
|
||||
FFD8..FFD9;
|
||||
FFDD..FFDF;
|
||||
FFE7;
|
||||
FFEF..FFF8;
|
||||
10000..102FF;
|
||||
1031F;
|
||||
10324..1032F;
|
||||
1034B..103FF;
|
||||
10426..10427;
|
||||
1044E..1CFFF;
|
||||
1D0F6..1D0FF;
|
||||
1D127..1D129;
|
||||
1D1DE..1D3FF;
|
||||
1D455;
|
||||
1D49D;
|
||||
1D4A0..1D4A1;
|
||||
1D4A3..1D4A4;
|
||||
1D4A7..1D4A8;
|
||||
1D4AD;
|
||||
1D4BA;
|
||||
1D4BC;
|
||||
1D4C1;
|
||||
1D4C4;
|
||||
1D506;
|
||||
1D50B..1D50C;
|
||||
1D515;
|
||||
1D51D;
|
||||
1D53A;
|
||||
1D53F;
|
||||
1D545;
|
||||
1D547..1D549;
|
||||
1D551;
|
||||
1D6A4..1D6A7;
|
||||
1D7CA..1D7CD;
|
||||
1D800..1FFFD;
|
||||
2A6D7..2F7FF;
|
||||
2FA1E..2FFFD;
|
||||
30000..3FFFD;
|
||||
40000..4FFFD;
|
||||
50000..5FFFD;
|
||||
60000..6FFFD;
|
||||
70000..7FFFD;
|
||||
80000..8FFFD;
|
||||
90000..9FFFD;
|
||||
A0000..AFFFD;
|
||||
B0000..BFFFD;
|
||||
C0000..CFFFD;
|
||||
D0000..DFFFD;
|
||||
E0000;
|
||||
E0002..E001F;
|
||||
E0080..EFFFD;
|
||||
|
||||
# Total code points 3653
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
###################
|
||||
# Copyright (C) 2003, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
|
||||
#################
|
||||
|
||||
# This file contains code points from Table B.1 from RFC 3454
|
||||
|
||||
00AD; ; Map to nothing
|
||||
034F; ; Map to nothing
|
||||
1806; ; Map to nothing
|
||||
180B; ; Map to nothing
|
||||
180C; ; Map to nothing
|
||||
180D; ; Map to nothing
|
||||
200B; ; Map to nothing
|
||||
200C; ; Map to nothing
|
||||
200D; ; Map to nothing
|
||||
2060; ; Map to nothing
|
||||
FE00; ; Map to nothing
|
||||
FE01; ; Map to nothing
|
||||
FE02; ; Map to nothing
|
||||
FE03; ; Map to nothing
|
||||
FE04; ; Map to nothing
|
||||
FE05; ; Map to nothing
|
||||
FE06; ; Map to nothing
|
||||
FE07; ; Map to nothing
|
||||
FE08; ; Map to nothing
|
||||
FE09; ; Map to nothing
|
||||
FE0A; ; Map to nothing
|
||||
FE0B; ; Map to nothing
|
||||
FE0C; ; Map to nothing
|
||||
FE0D; ; Map to nothing
|
||||
FE0E; ; Map to nothing
|
||||
FE0F; ; Map to nothing
|
||||
FEFF; ; Map to nothing
|
||||
|
||||
# Total code points 27
|
||||
|
File diff suppressed because it is too large
Load diff
|
@ -1,182 +0,0 @@
|
|||
###################
|
||||
# Copyright (C) 2003, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
|
||||
#################
|
||||
|
||||
# code points from Table C.1.1
|
||||
|
||||
0020; SPACE
|
||||
|
||||
# Total code points 1
|
||||
|
||||
###################
|
||||
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
|
||||
#################
|
||||
|
||||
# code points from Table C.1.2
|
||||
|
||||
00A0; NO..BREAK SPACE
|
||||
1680; OGHAM SPACE MARK
|
||||
2000; EN QUAD
|
||||
2001; EM QUAD
|
||||
2002; EN SPACE
|
||||
2003; EM SPACE
|
||||
2004; THREE..PER-EM SPACE
|
||||
2005; FOUR..PER-EM SPACE
|
||||
2006; SIX..PER-EM SPACE
|
||||
2007; FIGURE SPACE
|
||||
2008; PUNCTUATION SPACE
|
||||
2009; THIN SPACE
|
||||
200A; HAIR SPACE
|
||||
200B; ZERO WIDTH SPACE
|
||||
202F; NARROW NO..BREAK SPACE
|
||||
205F; MEDIUM MATHEMATICAL SPACE
|
||||
3000; IDEOGRAPHIC SPACE
|
||||
|
||||
# Total code points 13
|
||||
|
||||
###################
|
||||
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
|
||||
#################
|
||||
|
||||
# code points from Table C.2.1
|
||||
|
||||
0000..001F; [CONTROL CHARACTERS]
|
||||
007F; DELETE
|
||||
|
||||
# Total code points 18
|
||||
|
||||
###################
|
||||
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
|
||||
#################
|
||||
|
||||
# code points from Table C.2.2
|
||||
|
||||
0080..009F; [CONTROL CHARACTERS]
|
||||
06DD; ARABIC END OF AYAH
|
||||
070F; SYRIAC ABBREVIATION MARK
|
||||
180E; MONGOLIAN VOWEL SEPARATOR
|
||||
200C; ZERO WIDTH NON..JOINER
|
||||
200D; ZERO WIDTH JOINER
|
||||
2028; LINE SEPARATOR
|
||||
2029; PARAGRAPH SEPARATOR
|
||||
2060; WORD JOINER
|
||||
2061; FUNCTION APPLICATION
|
||||
2062; INVISIBLE TIMES
|
||||
2063; INVISIBLE SEPARATOR
|
||||
206A..206F; [CONTROL CHARACTERS]
|
||||
FEFF; ZERO WIDTH NO..BREAK SPACE
|
||||
FFF9..FFFC; [CONTROL CHARACTERS]
|
||||
1D173..1D17A; [MUSICAL CONTROL CHARACTERS]
|
||||
|
||||
# Total code points 29
|
||||
|
||||
###################
|
||||
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
|
||||
#################
|
||||
|
||||
# code points from Table C.3
|
||||
|
||||
E000..F8FF; [PRIVATE USE, PLANE 0]
|
||||
F0000..FFFFD; [PRIVATE USE, PLANE 15]
|
||||
100000..10FFFD; [PRIVATE USE, PLANE 16]
|
||||
|
||||
# Total code points 2051
|
||||
|
||||
###################
|
||||
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
|
||||
#################
|
||||
|
||||
# code points from Table C.4
|
||||
|
||||
FDD0..FDEF; [NONCHARACTER CODE POINTS]
|
||||
FFFE..FFFF; [NONCHARACTER CODE POINTS]
|
||||
1FFFE..1FFFF; [NONCHARACTER CODE POINTS]
|
||||
2FFFE..2FFFF; [NONCHARACTER CODE POINTS]
|
||||
3FFFE..3FFFF; [NONCHARACTER CODE POINTS]
|
||||
4FFFE..4FFFF; [NONCHARACTER CODE POINTS]
|
||||
5FFFE..5FFFF; [NONCHARACTER CODE POINTS]
|
||||
6FFFE..6FFFF; [NONCHARACTER CODE POINTS]
|
||||
7FFFE..7FFFF; [NONCHARACTER CODE POINTS]
|
||||
8FFFE..8FFFF; [NONCHARACTER CODE POINTS]
|
||||
9FFFE..9FFFF; [NONCHARACTER CODE POINTS]
|
||||
AFFFE..AFFFF; [NONCHARACTER CODE POINTS]
|
||||
BFFFE..BFFFF; [NONCHARACTER CODE POINTS]
|
||||
CFFFE..CFFFF; [NONCHARACTER CODE POINTS]
|
||||
DFFFE..DFFFF; [NONCHARACTER CODE POINTS]
|
||||
EFFFE..EFFFF; [NONCHARACTER CODE POINTS]
|
||||
FFFFE..FFFFF; [NONCHARACTER CODE POINTS]
|
||||
10FFFE..10FFFF; [NONCHARACTER CODE POINTS]
|
||||
|
||||
# Total code points 18
|
||||
|
||||
###################
|
||||
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
|
||||
#################
|
||||
|
||||
# code points from Table C.5
|
||||
|
||||
D800..DFFF; [SURROGATE CODES]
|
||||
|
||||
# Total code points 0
|
||||
|
||||
###################
|
||||
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
|
||||
#################
|
||||
|
||||
# code points from Table C.6
|
||||
|
||||
FFF9; INTERLINEAR ANNOTATION ANCHOR
|
||||
FFFA; INTERLINEAR ANNOTATION SEPARATOR
|
||||
FFFB; INTERLINEAR ANNOTATION TERMINATOR
|
||||
FFFC; OBJECT REPLACEMENT CHARACTER
|
||||
FFFD; REPLACEMENT CHARACTER
|
||||
|
||||
# Total code points 5
|
||||
|
||||
###################
|
||||
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
|
||||
#################
|
||||
|
||||
# code points from Table C.7
|
||||
|
||||
2FF0..2FFB; [IDEOGRAPHIC DESCRIPTION CHARACTERS]
|
||||
|
||||
# Total code points 1
|
||||
|
||||
###################
|
||||
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
|
||||
#################
|
||||
|
||||
# code points from Table C.8
|
||||
|
||||
0340; COMBINING GRAVE TONE MARK
|
||||
0341; COMBINING ACUTE TONE MARK
|
||||
200E; LEFT..TO-RIGHT MARK
|
||||
200F; RIGHT..TO-LEFT MARK
|
||||
202A; LEFT..TO-RIGHT EMBEDDING
|
||||
202B; RIGHT..TO-LEFT EMBEDDING
|
||||
202C; POP DIRECTIONAL FORMATTING
|
||||
202D; LEFT..TO-RIGHT OVERRIDE
|
||||
202E; RIGHT..TO-LEFT OVERRIDE
|
||||
206A; INHIBIT SYMMETRIC SWAPPING
|
||||
206B; ACTIVATE SYMMETRIC SWAPPING
|
||||
206C; INHIBIT ARABIC FORM SHAPING
|
||||
206D; ACTIVATE ARABIC FORM SHAPING
|
||||
206E; NATIONAL DIGIT SHAPES
|
||||
206F; NOMINAL DIGIT SHAPES
|
||||
|
||||
# Total code points 9
|
||||
|
||||
###################
|
||||
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
|
||||
#################
|
||||
|
||||
# code points from Table C.9
|
||||
|
||||
E0001; LANGUAGE TAG
|
||||
E0020..E007F; [TAGGING CHARACTERS]
|
||||
|
||||
# Total code points 82
|
||||
|
|
@ -1082,7 +1082,7 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st
|
|||
}
|
||||
|
||||
|
||||
tempUCATable *t = uprv_uca_initTempTable(image, src->opts, src->UCA, NOT_FOUND_TAG, status);
|
||||
tempUCATable *t = uprv_uca_initTempTable(image, src->opts, src->UCA, NOT_FOUND_TAG, NOT_FOUND_TAG, status);
|
||||
|
||||
|
||||
/* After this, we have assigned CE values to all regular CEs */
|
||||
|
|
|
@ -108,7 +108,7 @@ static int32_t uprv_uca_addExpansion(ExpansionTable *expansions, uint32_t value,
|
|||
}
|
||||
|
||||
U_CAPI tempUCATable* U_EXPORT2
|
||||
uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UErrorCode *status) {
|
||||
uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status) {
|
||||
tempUCATable *t = (tempUCATable *)uprv_malloc(sizeof(tempUCATable));
|
||||
/* test for NULL */
|
||||
if (t == NULL) {
|
||||
|
@ -147,7 +147,12 @@ uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollat
|
|||
}
|
||||
uprv_memset(t->expansions, 0, sizeof(ExpansionTable));
|
||||
/*t->mapping = ucmpe32_open(UCOL_SPECIAL_FLAG | (initTag<<24), UCOL_SPECIAL_FLAG | (SURROGATE_TAG<<24), UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG<<24), status);*/
|
||||
t->mapping = utrie_open(NULL, NULL, 0x100000, UCOL_SPECIAL_FLAG | (initTag<<24), TRUE); // Do your own mallocs for the structure, array and have linear Latin 1
|
||||
/*t->mapping = utrie_open(NULL, NULL, 0x100000, UCOL_SPECIAL_FLAG | (initTag<<24), TRUE); // Do your own mallocs for the structure, array and have linear Latin 1*/
|
||||
|
||||
t->mapping = utrie_open(NULL, NULL, 0x100000,
|
||||
UCOL_SPECIAL_FLAG | (initTag<<24),
|
||||
UCOL_SPECIAL_FLAG | (supplementaryInitTag << 24),
|
||||
TRUE); // Do your own mallocs for the structure, array and have linear Latin 1
|
||||
t->prefixLookup = uhash_open(prefixLookupHash, prefixLookupComp, status);
|
||||
uhash_setValueDeleter(t->prefixLookup, uhash_freeBlock);
|
||||
|
||||
|
@ -1357,11 +1362,13 @@ uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status) {
|
|||
// This is debug code to dump the contents of the trie. It needs two functions defined above
|
||||
{
|
||||
UTrie UCAt = { 0 };
|
||||
uint32_t trieWord;
|
||||
utrie_unserialize(&UCAt, dataStart+tableOffset, 9999999, status);
|
||||
UCAt.getFoldingOffset = myGetFoldingOffset;
|
||||
if(U_SUCCESS(*status)) {
|
||||
utrie_enum(&UCAt, NULL, enumRange, NULL);
|
||||
}
|
||||
trieWord = UTRIE_GET32_FROM_LEAD(UCAt, 0xDC01)
|
||||
}
|
||||
#endif
|
||||
tableOffset += paddedsize(mappingSize);
|
||||
|
|
|
@ -91,7 +91,7 @@ typedef struct {
|
|||
UHashtable *prefixLookup;
|
||||
} tempUCATable;
|
||||
|
||||
U_CAPI tempUCATable * U_EXPORT2 uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UErrorCode *status);
|
||||
U_CAPI tempUCATable * U_EXPORT2 uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status);
|
||||
U_CAPI tempUCATable * U_EXPORT2 uprv_uca_cloneTempTable(tempUCATable *t, UErrorCode *status);
|
||||
U_CAPI void U_EXPORT2 uprv_uca_closeTempTable(tempUCATable *t);
|
||||
U_CAPI uint32_t U_EXPORT2 uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status);
|
||||
|
|
|
@ -33,6 +33,8 @@ void addCompactArrayTest(TestNode** root);
|
|||
void addTestDeprecatedAPI(TestNode** root);
|
||||
void addUCharTransformTest(TestNode** root);
|
||||
void addUSetTest(TestNode** root);
|
||||
void addUStringPrepTest(TestNode** root);
|
||||
void addIDNATest(TestNode** root);
|
||||
|
||||
void addAllTests(TestNode** root)
|
||||
{
|
||||
|
@ -61,5 +63,9 @@ void addAllTests(TestNode** root)
|
|||
#endif
|
||||
addUSetTest(root);
|
||||
addTestDeprecatedAPI(root);
|
||||
#if !UCONFIG_NO_IDNA
|
||||
addUStringPrepTest(root);
|
||||
addIDNATest(root);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -605,5 +605,29 @@ SOURCE=.\utf8tst.c
|
|||
SOURCE=.\utransts.c
|
||||
# End Source File
|
||||
# End Group
|
||||
# Begin Group "sprep & idna"
|
||||
|
||||
# PROP Default_Filter ""
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\idnatest.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\nfsprep.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\nfsprep.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\spreptst.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\sprpdata.c
|
||||
# End Source File
|
||||
# End Group
|
||||
# End Target
|
||||
# End Project
|
||||
|
|
|
@ -1453,6 +1453,7 @@ static void TestComposeDecompose(void) {
|
|||
UChar32 u = 0;
|
||||
UChar comp[NORM_BUFFER_TEST_LEN];
|
||||
uint32_t len = 0;
|
||||
UCollationElements *iter;
|
||||
|
||||
noOfLoc = uloc_countAvailable();
|
||||
|
||||
|
@ -1514,6 +1515,7 @@ static void TestComposeDecompose(void) {
|
|||
ucol_close(coll);
|
||||
|
||||
log_verbose("Testing locales, number of cases = %i\n", noCases);
|
||||
iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
|
||||
for(i = 0; i<noOfLoc; i++) {
|
||||
status = U_ZERO_ERROR;
|
||||
locName = uloc_getAvailable(i);
|
||||
|
@ -1535,6 +1537,12 @@ static void TestComposeDecompose(void) {
|
|||
if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
|
||||
log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
|
||||
doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
|
||||
log_verbose("Testing NFC\n");
|
||||
ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
|
||||
backAndForth(iter);
|
||||
log_verbose("Testing NFD\n");
|
||||
ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
|
||||
backAndForth(iter);
|
||||
}
|
||||
}
|
||||
ucol_close(coll);
|
||||
|
@ -1544,6 +1552,7 @@ static void TestComposeDecompose(void) {
|
|||
free(t[u]);
|
||||
}
|
||||
free(t);
|
||||
ucol_closeElements(iter);
|
||||
}
|
||||
|
||||
static void TestEmptyRule(void) {
|
||||
|
@ -3374,56 +3383,69 @@ static void TestRuleOptions(void) {
|
|||
} tests[] = {
|
||||
/* - all befores here amount to zero */
|
||||
{ "&[before 1][first tertiary ignorable]<<<a",
|
||||
{ "\\u0000", "a"}, 2}, /* you cannot go before first tertiary ignorable */
|
||||
{ "\\u0000", "a"}, 2
|
||||
}, /* you cannot go before first tertiary ignorable */
|
||||
|
||||
{ "&[before 1][last tertiary ignorable]<<<a",
|
||||
{ "\\u0000", "a"}, 2}, /* you cannot go before last tertiary ignorable */
|
||||
{ "\\u0000", "a"}, 2
|
||||
}, /* you cannot go before last tertiary ignorable */
|
||||
|
||||
{ "&[before 1][first secondary ignorable]<<<a",
|
||||
{ "\\u0000", "a"}, 2}, /* you cannot go before first secondary ignorable */
|
||||
{ "\\u0000", "a"}, 2
|
||||
}, /* you cannot go before first secondary ignorable */
|
||||
|
||||
{ "&[before 1][last secondary ignorable]<<<a",
|
||||
{ "\\u0000", "a"}, 2}, /* you cannot go before first secondary ignorable */
|
||||
{ "\\u0000", "a"}, 2
|
||||
}, /* you cannot go before first secondary ignorable */
|
||||
|
||||
/* 'normal' befores */
|
||||
|
||||
{ "&[before 1][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
|
||||
{ "c", "b", "\\u0332", "a" }, 4},
|
||||
{ "c", "b", "\\u0332", "a" }, 4
|
||||
},
|
||||
|
||||
/* we don't have a code point that corresponds to
|
||||
* the last primary ignorable
|
||||
*/
|
||||
{ "&[before 2][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
|
||||
{ "\\u0332", "\\u20e3", "c", "b", "a" }, 5},
|
||||
{ "\\u0332", "\\u20e3", "c", "b", "a" }, 5
|
||||
},
|
||||
|
||||
{ "&[before 1][first variable]<<<c<<<b &[first variable]<a",
|
||||
{ "c", "b", "\\u0009", "a", "\\u000a" }, 5},
|
||||
{ "c", "b", "\\u0009", "a", "\\u000a" }, 5
|
||||
},
|
||||
|
||||
{ "&[last variable]<a &[before 1][last variable]<<<c<<<b ",
|
||||
{ "c", "b", "\\uD800\\uDF23", "a", "\\u02d0" }, 5},
|
||||
{ "c", "b", "\\uD800\\uDF23", "a", "\\u02d0" }, 5
|
||||
},
|
||||
|
||||
{ "&[first regular]<a"
|
||||
"&[before 1][first regular]<b",
|
||||
{ "b", "\\u02d0", "a", "\\u02d1"}, 4},
|
||||
{ "b", "\\u02d0", "a", "\\u02d1"}, 4
|
||||
},
|
||||
|
||||
{ "&[before 1][last regular]<b"
|
||||
"&[last regular]<a",
|
||||
{ "b", "\\uD801\\uDC25", "a", "\\u4e00" }, 4},
|
||||
{ "b", "\\uD801\\uDC25", "a", "\\u4e00" }, 4
|
||||
},
|
||||
|
||||
{ "&[before 1][first implicit]<b"
|
||||
"&[first implicit]<a",
|
||||
{ "b", "\\u4e00", "a", "\\u4e01"}, 4},
|
||||
{ "b", "\\u4e00", "a", "\\u4e01"}, 4
|
||||
},
|
||||
|
||||
{ "&[before 1][last implicit]<b"
|
||||
"&[last implicit]<a",
|
||||
{ "b", "\\U0010FFFC", "a" }, 3},
|
||||
{ "b", "\\U0010FFFC", "a" }, 3
|
||||
},
|
||||
|
||||
{ "&[last variable]<z"
|
||||
"&[last primary ignorable]<x"
|
||||
"&[last secondary ignorable]<<y"
|
||||
"&[last tertiary ignorable]<<<w"
|
||||
"&[top]<u",
|
||||
{"\\ufffb", "w", "y", "\\u20e3", "x", "\\u137c", "z", "u"}, 7 }
|
||||
{"\\ufffb", "w", "y", "\\u20e3", "x", "\\u137c", "z", "u"}, 7
|
||||
}
|
||||
|
||||
};
|
||||
uint32_t i;
|
||||
|
|
|
@ -537,12 +537,14 @@ static void TestNewTypes() {
|
|||
UChar* expectedEscaped = (UChar*)malloc(U_SIZEOF_UCHAR * patternLen);
|
||||
const UChar* got = ures_getStringByKey(theBundle,"test_unescaping",&len,&status);
|
||||
int32_t expectedLen = u_unescape(pattern,expectedEscaped,patternLen);
|
||||
if(u_strncmp(expectedEscaped,got,expectedLen)!=0 || expectedLen != len){
|
||||
if(got==NULL || u_strncmp(expectedEscaped,got,expectedLen)!=0 || expectedLen != len){
|
||||
log_err("genrb failed to unescape string\n");
|
||||
}
|
||||
for(i=0;i<expectedLen;i++){
|
||||
if(expectedEscaped[i] != got[i]){
|
||||
log_verbose("Expected: 0x%04X Got: 0x%04X \n",expectedEscaped[i], got[i]);
|
||||
if(got != NULL){
|
||||
for(i=0;i<expectedLen;i++){
|
||||
if(expectedEscaped[i] != got[i]){
|
||||
log_verbose("Expected: 0x%04X Got: 0x%04X \n",expectedEscaped[i], got[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
free(expectedEscaped);
|
||||
|
|
642
icu4c/source/test/cintltst/idnatest.c
Normal file
642
icu4c/source/test/cintltst/idnatest.c
Normal file
|
@ -0,0 +1,642 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: idnatest.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003jul11
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uidna.h"
|
||||
#include "cintltst.h"
|
||||
|
||||
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
#define MAX_DEST_SIZE 1000
|
||||
|
||||
static void TestToUnicode(void);
|
||||
static void TestToASCII(void);
|
||||
static void TestIDNToUnicode(void);
|
||||
static void TestIDNToASCII(void);
|
||||
static void TestCompare(void);
|
||||
|
||||
void addIDNATest(TestNode** root);
|
||||
|
||||
|
||||
typedef int32_t
|
||||
(*TestFunc) ( const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
int32_t options, UParseError *parseError,
|
||||
UErrorCode *status);
|
||||
typedef int32_t
|
||||
(*CompareFunc) (const UChar *s1, int32_t s1Len,
|
||||
const UChar *s2, int32_t s2Len,
|
||||
int32_t options,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
void
|
||||
addIDNATest(TestNode** root)
|
||||
{
|
||||
addTest(root, &TestToUnicode, "idna/TestToUnicode");
|
||||
addTest(root, &TestToASCII, "idna/TestToASCII");
|
||||
addTest(root, &TestIDNToUnicode, "idna/TestIDNToUnicode");
|
||||
addTest(root, &TestIDNToASCII, "idna/TestIDNToASCII");
|
||||
addTest(root, &TestCompare, "idna/TestCompare");
|
||||
}
|
||||
|
||||
static void
|
||||
testAPI(const UChar* src, const UChar* expected, const char* testName,
|
||||
UBool useSTD3ASCIIRules,UErrorCode expectedStatus,
|
||||
UBool doCompare, UBool testUnassigned, TestFunc func){
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UChar destStack[MAX_DEST_SIZE];
|
||||
int32_t destLen = 0;
|
||||
UChar* dest = NULL;
|
||||
int32_t expectedLen = (expected != NULL) ? u_strlen(expected) : 0;
|
||||
int32_t options = (useSTD3ASCIIRules == TRUE) ? UIDNA_USE_STD3_RULES : UIDNA_DEFAULT;
|
||||
UParseError parseError;
|
||||
int32_t tSrcLen = 0;
|
||||
UChar* tSrc = NULL;
|
||||
|
||||
if(src != NULL){
|
||||
tSrcLen = u_strlen(src);
|
||||
tSrc =(UChar*) malloc( U_SIZEOF_UCHAR * tSrcLen );
|
||||
memcpy(tSrc,src,tSrcLen * U_SIZEOF_UCHAR);
|
||||
}
|
||||
|
||||
/* test null-terminated source and return value of number of UChars required */
|
||||
if( expectedStatus != U_IDNA_STD3_ASCII_RULES_ERROR ){
|
||||
destLen = func(src,-1,NULL,0,options, &parseError , &status);
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
status = U_ZERO_ERROR; /* reset error code */
|
||||
if(destLen+1 < MAX_DEST_SIZE){
|
||||
dest = destStack;
|
||||
destLen = func(src,-1,dest,destLen+1,options, &parseError, &status);
|
||||
/* TODO : compare output with expected */
|
||||
if(U_SUCCESS(status) && expectedStatus != U_IDNA_STD3_ASCII_RULES_ERROR&& (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){
|
||||
log_err("Did not get the expected result for null terminated source.\n" );
|
||||
}
|
||||
}else{
|
||||
log_err( "%s null terminated source failed. Requires destCapacity > 300\n",testName);
|
||||
}
|
||||
}
|
||||
|
||||
if(status != expectedStatus){
|
||||
log_err( "Did not get the expected error for %s null terminated source failed. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
|
||||
free(tSrc);
|
||||
return;
|
||||
}
|
||||
if(testUnassigned ){
|
||||
status = U_ZERO_ERROR;
|
||||
destLen = func(src,-1,NULL,0,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status);
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
status = U_ZERO_ERROR; /* reset error code */
|
||||
if(destLen+1 < MAX_DEST_SIZE){
|
||||
dest = destStack;
|
||||
destLen = func(src,-1,dest,destLen+1,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status);
|
||||
/* TODO : compare output with expected */
|
||||
if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){
|
||||
log_err("Did not get the expected result for %s null terminated source with both options set.\n",testName);
|
||||
|
||||
}
|
||||
}else{
|
||||
log_err( "%s null terminated source failed. Requires destCapacity > 300\n",testName);
|
||||
}
|
||||
}
|
||||
/*testing query string*/
|
||||
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_ERROR){
|
||||
log_err( "Did not get the expected error for %s null terminated source with options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
|
||||
}
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
|
||||
/* test source with lengthand return value of number of UChars required*/
|
||||
destLen = func(tSrc, tSrcLen, NULL,0,options, &parseError, &status);
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
status = U_ZERO_ERROR; /* reset error code */
|
||||
if(destLen+1 < MAX_DEST_SIZE){
|
||||
dest = destStack;
|
||||
destLen = func(src,u_strlen(src),dest,destLen+1,options, &parseError, &status);
|
||||
/* TODO : compare output with expected */
|
||||
if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){
|
||||
log_err("Did not get the expected result for %s with source length.\n",testName);
|
||||
}
|
||||
}else{
|
||||
log_err( "%s with source length failed. Requires destCapacity > 300\n",testName);
|
||||
}
|
||||
}
|
||||
|
||||
if(status != expectedStatus){
|
||||
log_err( "Did not get the expected error for %s with source length. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
|
||||
}
|
||||
if(testUnassigned){
|
||||
status = U_ZERO_ERROR;
|
||||
|
||||
destLen = func(tSrc,tSrcLen,NULL,0,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status);
|
||||
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
status = U_ZERO_ERROR; /* reset error code */
|
||||
if(destLen+1 < MAX_DEST_SIZE){
|
||||
dest = destStack;
|
||||
destLen = func(src,u_strlen(src),dest,destLen+1,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status);
|
||||
/* TODO : compare output with expected */
|
||||
if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){
|
||||
log_err("Did not get the expected result for %s with source length and both options set.\n",testName);
|
||||
}
|
||||
}else{
|
||||
log_err( "%s with source length failed. Requires destCapacity > 300\n",testName);
|
||||
}
|
||||
}
|
||||
/*testing query string*/
|
||||
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_ERROR){
|
||||
log_err( "Did not get the expected error for %s with source length and options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
|
||||
}
|
||||
}
|
||||
}else{
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
destLen = func(src,-1,NULL,0,options | UIDNA_USE_STD3_RULES, &parseError, &status);
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
status = U_ZERO_ERROR; /* reset error code*/
|
||||
if(destLen+1 < MAX_DEST_SIZE){
|
||||
dest = destStack;
|
||||
destLen = func(src,-1,dest,destLen+1,options | UIDNA_USE_STD3_RULES, &parseError, &status);
|
||||
/* TODO : compare output with expected*/
|
||||
if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){
|
||||
log_err("Did not get the expected result for %s null terminated source with both options set.\n",testName);
|
||||
|
||||
}
|
||||
}else{
|
||||
log_err( "%s null terminated source failed. Requires destCapacity > 300\n",testName);
|
||||
}
|
||||
}
|
||||
/*testing query string*/
|
||||
if(status != expectedStatus){
|
||||
log_err( "Did not get the expected error for %s null terminated source with options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
|
||||
destLen = func(tSrc,tSrcLen,NULL,0,options | UIDNA_USE_STD3_RULES, &parseError, &status);
|
||||
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
status = U_ZERO_ERROR; /* reset error code*/
|
||||
if(destLen+1 < MAX_DEST_SIZE){
|
||||
dest = destStack;
|
||||
destLen = func(src,u_strlen(src),dest,destLen+1,options | UIDNA_USE_STD3_RULES, &parseError, &status);
|
||||
/* TODO : compare output with expected*/
|
||||
if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){
|
||||
log_err("Did not get the expected result for %s with source length and both options set.\n",testName);
|
||||
}
|
||||
}else{
|
||||
log_err( "%s with source length failed. Requires destCapacity > 300\n",testName);
|
||||
}
|
||||
}
|
||||
/*testing query string*/
|
||||
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_ERROR){
|
||||
log_err( "Did not get the expected error for %s with source length and options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
|
||||
}
|
||||
}
|
||||
free(tSrc);
|
||||
}
|
||||
|
||||
static UChar unicodeIn[][41] ={
|
||||
{
|
||||
0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, 0x0644,
|
||||
0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, 0x061F, 0x0000
|
||||
},
|
||||
{
|
||||
0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587,
|
||||
0x0000
|
||||
},
|
||||
{
|
||||
0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, 0x0074,
|
||||
0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, 0x00ED, 0x010D,
|
||||
0x0065, 0x0073, 0x006B, 0x0079, 0x0000
|
||||
},
|
||||
{
|
||||
0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, 0x05D8,
|
||||
0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x05DD, 0x05E2,
|
||||
0x05D1, 0x05E8, 0x05D9, 0x05EA, 0x0000
|
||||
},
|
||||
{
|
||||
0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, 0x094D,
|
||||
0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, 0x0928, 0x0939,
|
||||
0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, 0x0915, 0x0924, 0x0947,
|
||||
0x0939, 0x0948, 0x0902, 0x0000
|
||||
},
|
||||
{
|
||||
0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, 0x3092,
|
||||
0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, 0x306E, 0x304B,
|
||||
0x0000
|
||||
},
|
||||
/*
|
||||
{
|
||||
0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
|
||||
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
|
||||
0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C, 0x0000
|
||||
},
|
||||
*/
|
||||
{
|
||||
0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, 0x043E,
|
||||
0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, 0x043E, 0x0440,
|
||||
0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, 0x0441, 0x0441, 0x043A,
|
||||
0x0438, 0x0000
|
||||
},
|
||||
{
|
||||
0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, 0x0070,
|
||||
0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, 0x006D, 0x0070,
|
||||
0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, 0x0065, 0x0068, 0x0061,
|
||||
0x0062, 0x006C, 0x0061, 0x0072, 0x0065, 0x006E, 0x0045, 0x0073, 0x0070,
|
||||
0x0061, 0x00F1, 0x006F, 0x006C, 0x0000
|
||||
},
|
||||
{
|
||||
0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587,
|
||||
0x0000
|
||||
},
|
||||
{
|
||||
0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, 0x006B,
|
||||
0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, 0x0063, 0x0068,
|
||||
0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, 0x1EBF, 0x006E, 0x0067,
|
||||
0x0056, 0x0069, 0x1EC7, 0x0074, 0x0000
|
||||
},
|
||||
{
|
||||
0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F, 0x0000
|
||||
},
|
||||
{
|
||||
0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, 0x0074,
|
||||
0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, 0x002D, 0x004D,
|
||||
0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053, 0x0000
|
||||
},
|
||||
{
|
||||
0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, 0x006F,
|
||||
0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, 0x0079, 0x002D,
|
||||
0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, 0x6240, 0x0000
|
||||
},
|
||||
{
|
||||
0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032, 0x0000
|
||||
},
|
||||
{
|
||||
0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, 0x3059,
|
||||
0x308B, 0x0035, 0x79D2, 0x524D, 0x0000
|
||||
},
|
||||
{
|
||||
0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0,
|
||||
0x0000
|
||||
},
|
||||
{
|
||||
0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067, 0x0000
|
||||
},
|
||||
/* test non-BMP code points */
|
||||
{
|
||||
0xD800, 0xDF00, 0xD800, 0xDF01, 0xD800, 0xDF02, 0xD800, 0xDF03, 0xD800, 0xDF05,
|
||||
0xD800, 0xDF06, 0xD800, 0xDF07, 0xD800, 0xDF09, 0xD800, 0xDF0A, 0xD800, 0xDF0B,
|
||||
0x0000
|
||||
},
|
||||
{
|
||||
0xD800, 0xDF0D, 0xD800, 0xDF0C, 0xD800, 0xDF1E, 0xD800, 0xDF0F, 0xD800, 0xDF16,
|
||||
0xD800, 0xDF15, 0xD800, 0xDF14, 0xD800, 0xDF12, 0xD800, 0xDF10, 0xD800, 0xDF20,
|
||||
0xD800, 0xDF21,
|
||||
0x0000
|
||||
},
|
||||
/* Greek */
|
||||
{
|
||||
0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac
|
||||
},
|
||||
/* Maltese */
|
||||
{
|
||||
0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
|
||||
0x0127, 0x0061
|
||||
},
|
||||
/* Russian */
|
||||
{
|
||||
0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435,
|
||||
0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432,
|
||||
0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443,
|
||||
0x0441, 0x0441, 0x043a, 0x0438
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
static const char *asciiIn[] = {
|
||||
"xn--egbpdaj6bu4bxfgehfvwxn",
|
||||
"xn--ihqwcrb4cv8a8dqg056pqjye",
|
||||
"xn--Proprostnemluvesky-uyb24dma41a",
|
||||
"xn--4dbcagdahymbxekheh6e0a7fei0b",
|
||||
"xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd",
|
||||
"xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa",
|
||||
/* "xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c",*/
|
||||
"xn--b1abfaaepdrnnbgefbaDotcwatmq2g4l",
|
||||
"xn--PorqunopuedensimplementehablarenEspaol-fmd56a",
|
||||
"xn--ihqwctvzc91f659drss3x8bo0yb",
|
||||
"xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g",
|
||||
"xn--3B-ww4c5e180e575a65lsy2b",
|
||||
"xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n",
|
||||
"xn--Hello-Another-Way--fc4qua05auwb3674vfr0b",
|
||||
"xn--2-u9tlzr9756bt3uc0v",
|
||||
"xn--MajiKoi5-783gue6qz075azm5e",
|
||||
"xn--de-jg4avhby1noc0d",
|
||||
"xn--d9juau41awczczp",
|
||||
"XN--097CCDEKGHQJK",
|
||||
"XN--db8CBHEJLGH4E0AL",
|
||||
"xn--hxargifdar", /* Greek */
|
||||
"xn--bonusaa-5bb1da", /* Maltese */
|
||||
"xn--b1abfaaepdrnnbgefbadotcwatmq2g4l", /* Russian (Cyrillic)*/
|
||||
|
||||
};
|
||||
|
||||
static const char *domainNames[] = {
|
||||
"slip129-37-118-146.nc.us.ibm.net",
|
||||
"saratoga.pe.utexas.edu",
|
||||
"dial-120-45.ots.utexas.edu",
|
||||
"woo-085.dorms.waller.net",
|
||||
"hd30-049.hil.compuserve.com",
|
||||
"pem203-31.pe.ttu.edu",
|
||||
"56K-227.MaxTNT3.pdq.net",
|
||||
"dial-36-2.ots.utexas.edu",
|
||||
"slip129-37-23-152.ga.us.ibm.net",
|
||||
"ts45ip119.cadvision.com",
|
||||
"sdn-ts-004txaustP05.dialsprint.net",
|
||||
"bar-tnt1s66.erols.com",
|
||||
"101.st-louis-15.mo.dial-access.att.net",
|
||||
"h92-245.Arco.COM",
|
||||
"dial-13-2.ots.utexas.edu",
|
||||
"net-redynet29.datamarkets.com.ar",
|
||||
"ccs-shiva28.reacciun.net.ve",
|
||||
"7.houston-11.tx.dial-access.att.net",
|
||||
"ingw129-37-120-26.mo.us.ibm.net",
|
||||
"dialup6.austintx.com",
|
||||
"dns2.tpao.gov.tr",
|
||||
"slip129-37-119-194.nc.us.ibm.net",
|
||||
"cs7.dillons.co.uk.203.119.193.in-addr.arpa",
|
||||
"swprd1.innovplace.saskatoon.sk.ca",
|
||||
"bikini.bologna.maraut.it",
|
||||
"node91.subnet159-198-79.baxter.com",
|
||||
"cust19.max5.new-york.ny.ms.uu.net",
|
||||
"balexander.slip.andrew.cmu.edu",
|
||||
"pool029.max2.denver.co.dynip.alter.net",
|
||||
"cust49.max9.new-york.ny.ms.uu.net",
|
||||
"s61.abq-dialin2.hollyberry.com",
|
||||
"http://\\u0917\\u0928\\u0947\\u0936.sanjose.ibm.com",
|
||||
"www.xn--vea.com",
|
||||
"www.\\u00E0\\u00B3\\u00AF.com",
|
||||
"www.\\u00C2\\u00A4.com",
|
||||
"www.\\u00C2\\u00A3.com",
|
||||
"\\u0025",
|
||||
"\\u005C\\u005C",
|
||||
"@",
|
||||
"\\u002F",
|
||||
"www.\\u0021.com",
|
||||
"www.\\u0024.com",
|
||||
"\\u003f",
|
||||
/* These yeild U_IDNA_PROHIBITED_ERROR
|
||||
//"\\u00CF\\u0082.com",
|
||||
//"\\u00CE\\u00B2\\u00C3\\u009Fss.com",
|
||||
//"\\u00E2\\u0098\\u00BA.com",*/
|
||||
"\\u00C3\\u00BC.com",
|
||||
|
||||
};
|
||||
|
||||
static void
|
||||
TestToASCII(){
|
||||
|
||||
int32_t i;
|
||||
UChar buf[MAX_DEST_SIZE];
|
||||
const char* testName = "uidna_toASCII";
|
||||
TestFunc func = uidna_toASCII;
|
||||
for(i=0;i< (int32_t)(sizeof(unicodeIn)/sizeof(unicodeIn[0])); i++){
|
||||
u_charsToUChars(asciiIn[i],buf, strlen(asciiIn[i])+1);
|
||||
testAPI(unicodeIn[i], buf,testName, FALSE,U_ZERO_ERROR, TRUE, TRUE, func);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
TestToUnicode(){
|
||||
|
||||
int32_t i;
|
||||
UChar buf[MAX_DEST_SIZE];
|
||||
const char* testName = "uidna_toUnicode";
|
||||
TestFunc func = uidna_toUnicode;
|
||||
for(i=0;i< (int32_t)(sizeof(asciiIn)/sizeof(asciiIn[0])); i++){
|
||||
u_charsToUChars(asciiIn[i],buf, strlen(asciiIn[i])+1);
|
||||
testAPI(buf,unicodeIn[i],testName,FALSE,U_ZERO_ERROR, TRUE, TRUE, func);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
TestIDNToUnicode(){
|
||||
int32_t i;
|
||||
UChar buf[MAX_DEST_SIZE];
|
||||
UChar expected[MAX_DEST_SIZE];
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t bufLen = 0;
|
||||
UParseError parseError;
|
||||
const char* testName="uidna_IDNToUnicode";
|
||||
TestFunc func = uidna_IDNToUnicode;
|
||||
for(i=0;i< (int32_t)(sizeof(domainNames)/sizeof(domainNames[0])); i++){
|
||||
bufLen = strlen(domainNames[i]);
|
||||
bufLen = u_unescape(domainNames[i],buf, bufLen+1);
|
||||
func(buf,bufLen,expected,MAX_DEST_SIZE, UIDNA_ALLOW_UNASSIGNED, &parseError,&status);
|
||||
if(U_FAILURE(status)){
|
||||
log_err( "%s failed to convert domainNames[%i].Error: %s \n",testName, i, u_errorName(status));
|
||||
break;
|
||||
}
|
||||
testAPI(buf,expected,testName,FALSE,U_ZERO_ERROR, TRUE, TRUE, func);
|
||||
/*test toUnicode with all labels in the string*/
|
||||
testAPI(buf,expected,testName, FALSE,U_ZERO_ERROR, TRUE, TRUE, func);
|
||||
if(U_FAILURE(status)){
|
||||
log_err( "%s failed to convert domainNames[%i].Error: %s \n",testName,i, u_errorName(status));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
TestIDNToASCII(){
|
||||
int32_t i;
|
||||
UChar buf[MAX_DEST_SIZE];
|
||||
UChar expected[MAX_DEST_SIZE];
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t bufLen = 0;
|
||||
UParseError parseError;
|
||||
const char* testName="udina_IDNToASCII";
|
||||
TestFunc func=uidna_IDNToASCII;
|
||||
|
||||
for(i=0;i< (int32_t)(sizeof(domainNames)/sizeof(domainNames[0])); i++){
|
||||
bufLen = strlen(domainNames[i]);
|
||||
bufLen = u_unescape(domainNames[i],buf, bufLen+1);
|
||||
func(buf,bufLen,expected,MAX_DEST_SIZE, UIDNA_ALLOW_UNASSIGNED, &parseError,&status);
|
||||
if(U_FAILURE(status)){
|
||||
log_err( "%s failed to convert domainNames[%i].Error: %s \n",testName,i, u_errorName(status));
|
||||
break;
|
||||
}
|
||||
testAPI(buf,expected,testName, FALSE,U_ZERO_ERROR, TRUE, TRUE, func);
|
||||
/*test toASCII with all labels in the string*/
|
||||
testAPI(buf,expected,testName, FALSE,U_ZERO_ERROR, FALSE, TRUE, func);
|
||||
if(U_FAILURE(status)){
|
||||
log_err( "%s failed to convert domainNames[%i].Error: %s \n",testName,i, u_errorName(status));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
testCompareWithSrc(const UChar* s1, int32_t s1Len,
|
||||
const UChar* s2, int32_t s2Len,
|
||||
const char* testName, CompareFunc func,
|
||||
UBool isEqual){
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t retVal = func(s1,-1,s2,-1,UIDNA_DEFAULT,&status);
|
||||
|
||||
if(isEqual==TRUE && retVal !=0){
|
||||
log_err("Did not get the expected result for %s with null termniated strings.\n",testName);
|
||||
}
|
||||
if(U_FAILURE(status)){
|
||||
log_err( "%s null terminated source failed. Error: %s\n", testName,u_errorName(status));
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
retVal = func(s1,-1,s2,-1,UIDNA_ALLOW_UNASSIGNED,&status);
|
||||
|
||||
if(isEqual==TRUE && retVal !=0){
|
||||
log_err("Did not get the expected result for %s with null termniated strings with options set.\n", testName);
|
||||
}
|
||||
if(U_FAILURE(status)){
|
||||
log_err( "%s null terminated source and options set failed. Error: %s\n",testName, u_errorName(status));
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
retVal = func(s1,s1Len,s2,s2Len,UIDNA_DEFAULT,&status);
|
||||
|
||||
if(isEqual==TRUE && retVal !=0){
|
||||
log_err("Did not get the expected result for %s with string length.\n",testName);
|
||||
}
|
||||
if(U_FAILURE(status)){
|
||||
log_err( "%s with string length. Error: %s\n",testName, u_errorName(status));
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
retVal = func(s1,s1Len,s2,s2Len,UIDNA_ALLOW_UNASSIGNED,&status);
|
||||
|
||||
if(isEqual==TRUE && retVal !=0){
|
||||
log_err("Did not get the expected result for %s with string length and options set.\n",testName);
|
||||
}
|
||||
if(U_FAILURE(status)){
|
||||
log_err( "%s with string length and options set. Error: %s\n", u_errorName(status), testName);
|
||||
}
|
||||
}
|
||||
|
||||
static UChar*
|
||||
u_strcatChars(UChar *dst,
|
||||
const char *src)
|
||||
{
|
||||
UChar *anchor = dst; /* save a pointer to start of dst */
|
||||
|
||||
while(*dst != 0) { /* To end of first string */
|
||||
++dst;
|
||||
}
|
||||
while((*(dst++) = (UChar)*(src++)) != 0) { /* copy string 2 over */
|
||||
}
|
||||
|
||||
return anchor;
|
||||
}
|
||||
static void
|
||||
TestCompare(){
|
||||
int32_t i;
|
||||
|
||||
const char* testName ="uidna_compare";
|
||||
CompareFunc func = uidna_compare;
|
||||
|
||||
UChar www[] = {0x0057, 0x0057, 0x0057, 0x002E, 0x0000};
|
||||
UChar com[] = {0x002E, 0x0043, 0x004F, 0x004D, 0x0000};
|
||||
UChar buf[MAX_DEST_SIZE]={0x0057, 0x0057, 0x0057, 0x002E, 0x0000};
|
||||
UChar source[MAX_DEST_SIZE]={0},
|
||||
uni0[MAX_DEST_SIZE]={0},
|
||||
uni1[MAX_DEST_SIZE]={0},
|
||||
ascii0[MAX_DEST_SIZE]={0},
|
||||
ascii1[MAX_DEST_SIZE]={0};
|
||||
|
||||
|
||||
u_strcat(uni0,unicodeIn[0]);
|
||||
u_strcat(uni0,com);
|
||||
|
||||
u_strcat(uni1,unicodeIn[1]);
|
||||
u_strcat(uni1,com);
|
||||
|
||||
u_strcatChars(ascii0,asciiIn[0]);
|
||||
u_strcat(ascii0,com);
|
||||
|
||||
u_strcatChars(ascii1,asciiIn[1]);
|
||||
u_strcat(ascii1,com);
|
||||
u_strcat(source, buf);
|
||||
for(i=0;i< (int32_t)(sizeof(unicodeIn)/sizeof(unicodeIn[0])); i++){
|
||||
UChar* src;
|
||||
int32_t srcLen;
|
||||
u_charsToUChars(asciiIn[i],buf+4, strlen(asciiIn[i]));
|
||||
u_strcat(buf,com);
|
||||
|
||||
/* for every entry in unicodeIn array
|
||||
prepend www. and append .com*/
|
||||
source[4]=0;
|
||||
u_strcat(source,unicodeIn[i]);
|
||||
u_strcat(source,com);
|
||||
|
||||
/* a) compare it with itself*/
|
||||
src = source;
|
||||
srcLen = u_strlen(src);
|
||||
|
||||
testCompareWithSrc(src,srcLen,src,srcLen,testName, func, TRUE);
|
||||
|
||||
/* b) compare it with asciiIn equivalent */
|
||||
/*testCompareWithSrc(src,srcLen,buf,u_strlen(buf),testName, func,TRUE);*/
|
||||
|
||||
/* c) compare it with unicodeIn not equivalent*/
|
||||
if(i==0){
|
||||
testCompareWithSrc(src,srcLen,uni1,u_strlen(uni1),testName, func,FALSE);
|
||||
}else{
|
||||
testCompareWithSrc(src,srcLen,uni0,u_strlen(uni0),testName, func,FALSE);
|
||||
}
|
||||
/* d) compare it with asciiIn not equivalent */
|
||||
if(i==0){
|
||||
testCompareWithSrc(src,srcLen,ascii1,u_strlen(ascii1),testName, func,FALSE);
|
||||
}else{
|
||||
testCompareWithSrc(src,srcLen,ascii0,u_strlen(ascii0),testName, func,FALSE);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
||||
|
342
icu4c/source/test/cintltst/nfsprep.c
Normal file
342
icu4c/source/test/cintltst/nfsprep.c
Normal file
|
@ -0,0 +1,342 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: nfsprep.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003jul11
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include "nfsprep.h"
|
||||
#include "cstring.h"
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
#define NFS4_MAX_BUFFER_SIZE 1000
|
||||
#define PREFIX_SUFFIX_SEPARATOR 0x0040 /* '@' */
|
||||
|
||||
|
||||
const char* NFS4DataFileNames[5] ={
|
||||
"nfscss",
|
||||
"nfscsi",
|
||||
"nfscis",
|
||||
"nfsmxp",
|
||||
"nfsmxs"
|
||||
};
|
||||
|
||||
|
||||
int32_t
|
||||
nfs4_prepare( const char* src, int32_t srcLength,
|
||||
char* dest, int32_t destCapacity,
|
||||
NFS4ProfileState state,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status){
|
||||
|
||||
UChar b1Stack[NFS4_MAX_BUFFER_SIZE],
|
||||
b2Stack[NFS4_MAX_BUFFER_SIZE];
|
||||
char b3Stack[NFS4_MAX_BUFFER_SIZE];
|
||||
|
||||
//initialize pointers to stack buffers
|
||||
UChar *b1 = b1Stack, *b2 = b2Stack;
|
||||
char *b3=b3Stack;
|
||||
int32_t b1Len=0, b2Len=0, b3Len=0,
|
||||
b1Capacity = NFS4_MAX_BUFFER_SIZE,
|
||||
b2Capacity = NFS4_MAX_BUFFER_SIZE,
|
||||
b3Capacity = NFS4_MAX_BUFFER_SIZE,
|
||||
reqLength=0;
|
||||
|
||||
UStringPrepProfile* profile = NULL;
|
||||
/* get the test data path */
|
||||
const char *testdatapath = NULL;
|
||||
|
||||
if(status==NULL || U_FAILURE(*status)){
|
||||
return 0;
|
||||
}
|
||||
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
testdatapath = loadTestData(status);
|
||||
|
||||
/* convert the string from UTF-8 to UTF-16 */
|
||||
u_strFromUTF8(b1,b1Capacity,&b1Len,src,srcLength,status);
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
|
||||
/* reset the status */
|
||||
*status = U_ZERO_ERROR;
|
||||
|
||||
b1 = (UChar*) malloc(b1Len * U_SIZEOF_UCHAR);
|
||||
if(b1==NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
b1Capacity = b1Len;
|
||||
u_strFromUTF8(b1, b1Capacity, &b1Len, src, srcLength, status);
|
||||
}
|
||||
|
||||
/* open the profile */
|
||||
profile = usprep_open(testdatapath, NFS4DataFileNames[state], status);
|
||||
/* prepare the string */
|
||||
b2Len = usprep_prepare(profile, b1, b1Len, b2, b2Capacity, USPREP_NONE, parseError, status);
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
*status = U_ZERO_ERROR;
|
||||
b2 = (UChar*) malloc(b2Len * U_SIZEOF_UCHAR);
|
||||
if(b2== NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
b2Len = usprep_prepare(profile, b1, b1Len, b2, b2Len, USPREP_NONE, parseError, status);
|
||||
}
|
||||
|
||||
/* convert the string back to UTF-8 */
|
||||
u_strToUTF8(b3,b3Capacity, &b3Len, b2, b2Len, status);
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
*status = U_ZERO_ERROR;
|
||||
b3 = (char*) malloc(b3Len);
|
||||
if(b3== NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
b3Capacity = b3Len;
|
||||
u_strToUTF8(b3,b3Capacity, &b3Len, b2, b2Len, status);
|
||||
}
|
||||
|
||||
reqLength = b3Len;
|
||||
if(dest!=NULL && reqLength <= destCapacity){
|
||||
memmove(dest, b3, reqLength);
|
||||
}
|
||||
|
||||
CLEANUP:
|
||||
if(b1!=b1Stack){
|
||||
free(b1);
|
||||
}
|
||||
if(b2!=b2Stack){
|
||||
free(b2);
|
||||
}
|
||||
if(b3!=b3Stack){
|
||||
free(b3);
|
||||
}
|
||||
|
||||
return u_terminateChars(dest, destCapacity, reqLength, status);
|
||||
}
|
||||
|
||||
static void
|
||||
syntaxError( const UChar* rules,
|
||||
int32_t pos,
|
||||
int32_t rulesLen,
|
||||
UParseError* parseError){
|
||||
int32_t start, stop;
|
||||
if(parseError == NULL){
|
||||
return;
|
||||
}
|
||||
if(pos == rulesLen && rulesLen >0){
|
||||
pos--;
|
||||
}
|
||||
parseError->offset = pos;
|
||||
parseError->line = 0 ; // we are not using line numbers
|
||||
|
||||
// for pre-context
|
||||
start = (pos <=U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
|
||||
stop = pos;
|
||||
|
||||
u_memcpy(parseError->preContext,rules+start,stop-start);
|
||||
//null terminate the buffer
|
||||
parseError->preContext[stop-start] = 0;
|
||||
|
||||
//for post-context
|
||||
start = pos;
|
||||
if(start<rulesLen) {
|
||||
U16_FWD_1(rules, start, rulesLen);
|
||||
}
|
||||
|
||||
stop = ((pos+U_PARSE_CONTEXT_LEN)<= rulesLen )? (pos+(U_PARSE_CONTEXT_LEN)) :
|
||||
rulesLen;
|
||||
if(start < stop){
|
||||
u_memcpy(parseError->postContext,rules+start,stop-start);
|
||||
//null terminate the buffer
|
||||
parseError->postContext[stop-start]= 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* sorted array for binary search*/
|
||||
static const char* special_prefixes[]={
|
||||
"ANONYMOUS",
|
||||
"AUTHENTICATED"
|
||||
"BATCH",
|
||||
"DIALUP",
|
||||
"EVERYONE",
|
||||
"GROUP",
|
||||
"INTERACTIVE",
|
||||
"NETWORK",
|
||||
"OWNER",
|
||||
};
|
||||
|
||||
|
||||
/* binary search the sorted array */
|
||||
static int
|
||||
findStringIndex(const char* const *sortedArr, int32_t sortedArrLen, const char* target, int32_t targetLen){
|
||||
|
||||
int left, middle, right,rc;
|
||||
|
||||
left =0;
|
||||
right= sortedArrLen-1;
|
||||
|
||||
while(left <= right){
|
||||
middle = (left+right)/2;
|
||||
rc=strncmp(sortedArr[middle],target, targetLen);
|
||||
|
||||
if(rc<0){
|
||||
left = middle+1;
|
||||
}else if(rc >0){
|
||||
right = middle -1;
|
||||
}else{
|
||||
return middle;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void
|
||||
getPrefixSuffix(const char *src, int32_t srcLength,
|
||||
const char **prefix, int32_t *prefixLen,
|
||||
const char **suffix, int32_t *suffixLen,
|
||||
UErrorCode *status){
|
||||
|
||||
int32_t i=0;
|
||||
*prefix = src;
|
||||
while(i<srcLength){
|
||||
if(src[i] == PREFIX_SUFFIX_SEPARATOR){
|
||||
if((i+1) == srcLength){
|
||||
/* we reached the end of the string */
|
||||
*suffix = NULL;
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
i++;/* the prefix contains the separator */
|
||||
*suffix = src + i;
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
*prefixLen = i;
|
||||
*suffixLen = srcLength - i;
|
||||
/* special prefixes must not be followed by suffixes! */
|
||||
if((findStringIndex(special_prefixes,LENGTHOF(special_prefixes), *prefix, *prefixLen-1) != -1) && (*suffix != NULL)){
|
||||
*status = U_PARSE_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int32_t
|
||||
nfs4_mixed_prepare( const char* src, int32_t srcLength,
|
||||
char* dest, int32_t destCapacity,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status){
|
||||
|
||||
const char *prefix = NULL, *suffix = NULL;
|
||||
int32_t prefixLen=0, suffixLen=0;
|
||||
char pStack[NFS4_MAX_BUFFER_SIZE],
|
||||
sStack[NFS4_MAX_BUFFER_SIZE];
|
||||
char *p=pStack, *s=sStack;
|
||||
int32_t pLen=0, sLen=0, reqLen=0,
|
||||
pCapacity = NFS4_MAX_BUFFER_SIZE,
|
||||
sCapacity = NFS4_MAX_BUFFER_SIZE;
|
||||
|
||||
|
||||
if(status==NULL || U_FAILURE(*status)){
|
||||
return 0;
|
||||
}
|
||||
if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
if(srcLength == -1){
|
||||
srcLength = uprv_strlen(src);
|
||||
}
|
||||
getPrefixSuffix(src, srcLength, &prefix, &prefixLen, &suffix, &suffixLen, status);
|
||||
|
||||
/* prepare the prefix */
|
||||
pLen = nfs4_prepare(prefix, prefixLen, p, pCapacity, NFS4_MIXED_PREP_PREFIX, parseError, status);
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
*status = U_ZERO_ERROR;
|
||||
p = (char*) malloc(pLen);
|
||||
if(p == NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
pLen = nfs4_prepare(prefix, prefixLen, p, pLen, NFS4_MIXED_PREP_PREFIX, parseError, status);
|
||||
}
|
||||
|
||||
/* prepare the suffix */
|
||||
if(suffix != NULL){
|
||||
sLen = nfs4_prepare(suffix, suffixLen, s, sCapacity, NFS4_MIXED_PREP_SUFFIX, parseError, status);
|
||||
if(*status == U_BUFFER_OVERFLOW_ERROR){
|
||||
*status = U_ZERO_ERROR;
|
||||
s = (char*) malloc(pLen);
|
||||
if(s == NULL){
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
sLen = nfs4_prepare(suffix, suffixLen, s, sLen, NFS4_MIXED_PREP_SUFFIX, parseError, status);
|
||||
}
|
||||
}
|
||||
reqLen = pLen+sLen;
|
||||
if(dest != NULL && reqLen <= destCapacity){
|
||||
memmove(dest, p, pLen);
|
||||
memmove(dest+pLen, s, sLen);
|
||||
}
|
||||
|
||||
CLEANUP:
|
||||
if(p != pStack){
|
||||
free(p);
|
||||
}
|
||||
if(s != sStack){
|
||||
free(s);
|
||||
}
|
||||
|
||||
return u_terminateChars(dest, destCapacity, reqLen, status);
|
||||
}
|
||||
|
||||
int32_t
|
||||
nfs4_cis_prepare( const char* src, int32_t srcLength,
|
||||
char* dest, int32_t destCapacity,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status){
|
||||
return nfs4_prepare(src, srcLength, dest, destCapacity, NFS4_CIS_PREP, parseError, status);
|
||||
}
|
||||
|
||||
|
||||
int32_t
|
||||
nfs4_cs_prepare( const char* src, int32_t srcLength,
|
||||
char* dest, int32_t destCapacity,
|
||||
UBool isCaseSensitive,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status){
|
||||
if(isCaseSensitive){
|
||||
return nfs4_prepare(src, srcLength, dest, destCapacity, NFS4_CS_PREP_CS, parseError, status);
|
||||
}else{
|
||||
return nfs4_prepare(src, srcLength, dest, destCapacity, NFS4_CS_PREP_CI, parseError, status);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
||||
|
113
icu4c/source/test/cintltst/nfsprep.h
Normal file
113
icu4c/source/test/cintltst/nfsprep.h
Normal file
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: nfsprep.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003jul11
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
#ifndef _NFSPREP_H
|
||||
#define _NFSPREP_H
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "cintltst.h"
|
||||
#include "unicode/usprep.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
enum NFS4ProfileState{
|
||||
NFS4_CS_PREP_CS,
|
||||
NFS4_CS_PREP_CI,
|
||||
NFS4_CIS_PREP,
|
||||
NFS4_MIXED_PREP_PREFIX,
|
||||
NFS4_MIXED_PREP_SUFFIX
|
||||
};
|
||||
|
||||
typedef enum NFS4ProfileState NFS4ProfileState;
|
||||
|
||||
/**
|
||||
* Prepares the source UTF-8 string for use in file names and
|
||||
* returns UTF-8 string on output.
|
||||
* @param src
|
||||
* @param srcLen
|
||||
* @param dest
|
||||
* @param destCapacity
|
||||
* @param state
|
||||
* @param parseError
|
||||
* @param status
|
||||
*/
|
||||
int32_t
|
||||
nfs4_prepare(const char* src, int32_t srcLength,
|
||||
char* dest, int32_t destCapacity,
|
||||
NFS4ProfileState state,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* @param dest
|
||||
* @param destCapacity
|
||||
* @param src
|
||||
* @param srcLen
|
||||
* @param state
|
||||
* @param parseError
|
||||
* @param status
|
||||
*/
|
||||
int32_t
|
||||
nfs4_mixed_prepare( const char* src, int32_t srcLength,
|
||||
char* dest, int32_t destCapacity,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* @param dest
|
||||
* @param destCapacity
|
||||
* @param src
|
||||
* @param srcLen
|
||||
* @param state
|
||||
* @param parseError
|
||||
* @param status
|
||||
*/
|
||||
int32_t
|
||||
nfs4_cis_prepare( const char* src, int32_t srcLength,
|
||||
char* dest, int32_t destCapacity,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* @param dest
|
||||
* @param destCapacity
|
||||
* @param src
|
||||
* @param srcLen
|
||||
* @param state
|
||||
* @param parseError
|
||||
* @param status
|
||||
*/
|
||||
int32_t
|
||||
nfs4_cs_prepare( const char* src, int32_t srcLength,
|
||||
char* dest, int32_t destCapacity,
|
||||
UBool isCaseSensitive,
|
||||
UParseError* parseError,
|
||||
UErrorCode* status);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
450
icu4c/source/test/cintltst/spreptst.c
Normal file
450
icu4c/source/test/cintltst/spreptst.c
Normal file
|
@ -0,0 +1,450 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: spreptst.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003jul11
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/usprep.h"
|
||||
#include "cintltst.h"
|
||||
#include "nfsprep.h"
|
||||
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
void addUStringPrepTest(TestNode** root);
|
||||
void doStringPrepTest(const char* binFileName, const char* txtFileName,
|
||||
int32_t options, UErrorCode* errorCode);
|
||||
|
||||
static void Test_nfs4_cs_prep_data(void);
|
||||
static void Test_nfs4_cis_prep_data(void);
|
||||
static void Test_nfs4_mixed_prep_data(void);
|
||||
static void Test_nfs4_cs_prep(void);
|
||||
static void Test_nfs4_cis_prep(void);
|
||||
static void Test_nfs4_mixed_prep(void);
|
||||
|
||||
void
|
||||
addUStringPrepTest(TestNode** root)
|
||||
{
|
||||
addTest(root, &Test_nfs4_cs_prep_data, "spreptst/Test_nfs4_cs_prep_data");
|
||||
addTest(root, &Test_nfs4_cis_prep_data, "spreptst/Test_nfs4_cis_prep_data");
|
||||
addTest(root, &Test_nfs4_mixed_prep_data, "spreptst/Test_nfs4_mixed_prep_data");
|
||||
/*addTest(root, &Test_nfs4_cs_prep, "spreptst/Test_nfs4_cs_prep");*/
|
||||
addTest(root, &Test_nfs4_cis_prep, "spreptst/Test_nfs4_cis_prep");
|
||||
addTest(root, &Test_nfs4_mixed_prep, "spreptst/Test_nfs4_mixed_prep");
|
||||
}
|
||||
|
||||
static void
|
||||
Test_nfs4_cs_prep_data(void){
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
log_verbose("Testing nfs4_cs_prep_ci.txt\n");
|
||||
doStringPrepTest("nfscsi","nfs4_cs_prep_ci.txt", USPREP_NONE, &errorCode);
|
||||
|
||||
log_verbose("Testing nfs4_cs_prep_cs.txt\n");
|
||||
errorCode = U_ZERO_ERROR;
|
||||
doStringPrepTest("nfscss","nfs4_cs_prep_cs.txt", USPREP_NONE, &errorCode);
|
||||
|
||||
|
||||
}
|
||||
static void
|
||||
Test_nfs4_cis_prep_data(void){
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
log_verbose("Testing nfs4_cis_prep.txt\n");
|
||||
doStringPrepTest("nfscis","nfs4_cis_prep.txt", USPREP_NONE, &errorCode);
|
||||
}
|
||||
static void
|
||||
Test_nfs4_mixed_prep_data(void){
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
log_verbose("Testing nfs4_mixed_prep_s.txt\n");
|
||||
doStringPrepTest("nfsmxs","nfs4_mixed_prep_s.txt", USPREP_NONE, &errorCode);
|
||||
|
||||
errorCode = U_ZERO_ERROR;
|
||||
log_verbose("Testing nfs4_mixed_prep_p.txt\n");
|
||||
doStringPrepTest("nfsmxp","nfs4_mixed_prep_p.txt", USPREP_NONE, &errorCode);
|
||||
|
||||
}
|
||||
|
||||
static struct ConformanceTestCases
|
||||
{
|
||||
const char *comment;
|
||||
const char *in;
|
||||
const char *out;
|
||||
const char *profile;
|
||||
UErrorCode expectedStatus;
|
||||
}
|
||||
conformanceTestCases[] =
|
||||
{
|
||||
|
||||
{
|
||||
"Case folding ASCII U+0043 U+0041 U+0046 U+0045",
|
||||
"\x43\x41\x46\x45", "\x63\x61\x66\x65",
|
||||
"nfs4_cis_prep",
|
||||
U_ZERO_ERROR
|
||||
|
||||
},
|
||||
{
|
||||
"Case folding 8bit U+00DF (german sharp s)",
|
||||
"\xC3\x9F", "\x73\x73",
|
||||
"nfs4_cis_prep",
|
||||
U_ZERO_ERROR
|
||||
},
|
||||
{
|
||||
"Non-ASCII multibyte space character U+1680",
|
||||
"\xE1\x9A\x80", NULL,
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Non-ASCII 8bit control character U+0085",
|
||||
"\xC2\x85", NULL,
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Non-ASCII multibyte control character U+180E",
|
||||
"\xE1\xA0\x8E", NULL,
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Non-ASCII control character U+1D175",
|
||||
"\xF0\x9D\x85\xB5", NULL,
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Plane 0 private use character U+F123",
|
||||
"\xEF\x84\xA3", NULL,
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Plane 15 private use character U+F1234",
|
||||
"\xF3\xB1\x88\xB4", NULL,
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Plane 16 private use character U+10F234",
|
||||
"\xF4\x8F\x88\xB4", NULL,
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Non-character code point U+8FFFE",
|
||||
"\xF2\x8F\xBF\xBE", NULL,
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Non-character code point U+10FFFF",
|
||||
"\xF4\x8F\xBF\xBF", NULL,
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_PROHIBITED_ERROR
|
||||
},
|
||||
/*
|
||||
{
|
||||
"Surrogate code U+DF42",
|
||||
"\xED\xBD\x82", NULL, "nfs4_cis_prep", UIDNA_DEFAULT,
|
||||
U_STRINGPREP_PROHIBITED_ERROR
|
||||
},
|
||||
*/
|
||||
{
|
||||
"Non-plain text character U+FFFD",
|
||||
"\xEF\xBF\xBD", NULL,
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Ideographic description character U+2FF5",
|
||||
"\xE2\xBF\xB5", NULL,
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Display property character U+0341",
|
||||
"\xCD\x81", "\xCC\x81",
|
||||
"nfs4_cis_prep", U_ZERO_ERROR
|
||||
|
||||
},
|
||||
|
||||
{
|
||||
"Left-to-right mark U+200E",
|
||||
"\xE2\x80\x8E", "\xCC\x81",
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
|
||||
"Deprecated U+202A",
|
||||
"\xE2\x80\xAA", "\xCC\x81",
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Language tagging character U+E0001",
|
||||
"\xF3\xA0\x80\x81", "\xCC\x81",
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Language tagging character U+E0042",
|
||||
"\xF3\xA0\x81\x82", NULL,
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Bidi: RandALCat character U+05BE and LCat characters",
|
||||
"\x66\x6F\x6F\xD6\xBE\x62\x61\x72", NULL,
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_CHECK_BIDI_ERROR
|
||||
},
|
||||
{
|
||||
"Bidi: RandALCat character U+FD50 and LCat characters",
|
||||
"\x66\x6F\x6F\xEF\xB5\x90\x62\x61\x72", NULL,
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_CHECK_BIDI_ERROR
|
||||
},
|
||||
{
|
||||
"Bidi: RandALCat character U+FB38 and LCat characters",
|
||||
"\x66\x6F\x6F\xEF\xB9\xB6\x62\x61\x72", "\x66\x6F\x6F \xd9\x8e\x62\x61\x72",
|
||||
"nfs4_cis_prep",
|
||||
U_ZERO_ERROR
|
||||
},
|
||||
{ "Bidi: RandALCat without trailing RandALCat U+0627 U+0031",
|
||||
"\xD8\xA7\x31", NULL,
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_CHECK_BIDI_ERROR
|
||||
},
|
||||
{
|
||||
"Bidi: RandALCat character U+0627 U+0031 U+0628",
|
||||
"\xD8\xA7\x31\xD8\xA8", "\xD8\xA7\x31\xD8\xA8",
|
||||
"nfs4_cis_prep",
|
||||
U_ZERO_ERROR
|
||||
},
|
||||
{
|
||||
"Unassigned code point U+E0002",
|
||||
"\xF3\xA0\x80\x82", NULL,
|
||||
"nfs4_cis_prep",
|
||||
U_STRINGPREP_UNASSIGNED_ERROR
|
||||
},
|
||||
|
||||
/* // Invalid UTF-8
|
||||
{
|
||||
"Larger test (shrinking)",
|
||||
"X\xC2\xAD\xC3\xDF\xC4\xB0\xE2\x84\xA1\x6a\xcc\x8c\xc2\xa0\xc2"
|
||||
"\xaa\xce\xb0\xe2\x80\x80", "xssi\xcc\x87""tel\xc7\xb0 a\xce\xb0 ",
|
||||
"nfs4_cis_prep",
|
||||
U_ZERO_ERROR
|
||||
},
|
||||
{
|
||||
|
||||
"Larger test (expanding)",
|
||||
"X\xC3\xDF\xe3\x8c\x96\xC4\xB0\xE2\x84\xA1\xE2\x92\x9F\xE3\x8c\x80",
|
||||
"xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\x88"
|
||||
"\xe3\x83\xab""i\xcc\x87""tel\x28""d\x29\xe3\x82\xa2\xe3\x83\x91"
|
||||
"\xe3\x83\xbc\xe3\x83\x88"
|
||||
"nfs4_cis_prep",
|
||||
U_ZERO_ERROR
|
||||
},
|
||||
*/
|
||||
};
|
||||
static void Test_nfs4_cis_prep(void){
|
||||
int32_t i=0;
|
||||
for(i=0;i< (int32_t)(sizeof(conformanceTestCases)/sizeof(conformanceTestCases[0]));i++){
|
||||
const char* src = conformanceTestCases[i].in;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UParseError parseError;
|
||||
UErrorCode expectedStatus = conformanceTestCases[i].expectedStatus;
|
||||
const char* expectedDest = conformanceTestCases[i].out;
|
||||
char* dest = NULL;
|
||||
int32_t destLen = 0;
|
||||
destLen = nfs4_cis_prepare(src , strlen(src), dest, destLen, &parseError, &status);
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
status = U_ZERO_ERROR;
|
||||
dest = (char*) malloc(++destLen);
|
||||
destLen = nfs4_cis_prepare( src , strlen(src), dest, destLen, &parseError, &status);
|
||||
}
|
||||
if(expectedStatus != status){
|
||||
log_err("Did not get the expected status for nfs4_cis_prep at index %i. Expected: %s Got: %s\n",i, u_errorName(expectedStatus), u_errorName(status));
|
||||
}
|
||||
if(U_SUCCESS(status) && (strcmp(expectedDest,dest) !=0)){
|
||||
log_err("Did not get the expected output for nfs4_cis_prep at index %i.\n", i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
There are several special identifiers ("who") which need to be
|
||||
understood universally, rather than in the context of a particular
|
||||
DNS domain. Some of these identifiers cannot be understood when an
|
||||
NFS client accesses the server, but have meaning when a local process
|
||||
accesses the file. The ability to display and modify these
|
||||
permissions is permitted over NFS, even if none of the access methods
|
||||
on the server understands the identifiers.
|
||||
|
||||
Who Description
|
||||
_______________________________________________________________
|
||||
|
||||
"OWNER" The owner of the file.
|
||||
"GROUP" The group associated with the file.
|
||||
"EVERYONE" The world.
|
||||
"INTERACTIVE" Accessed from an interactive terminal.
|
||||
"NETWORK" Accessed via the network.
|
||||
"DIALUP" Accessed as a dialup user to the server.
|
||||
"BATCH" Accessed from a batch job.
|
||||
"ANONYMOUS" Accessed without any authentication.
|
||||
"AUTHENTICATED" Any authenticated user (opposite of
|
||||
ANONYMOUS)
|
||||
"SERVICE" Access from a system service.
|
||||
|
||||
To avoid conflict, these special identifiers are distinguish by an
|
||||
appended "@" and should appear in the form "xxxx@" (note: no domain
|
||||
name after the "@"). For example: ANONYMOUS@.
|
||||
*/
|
||||
static const char* mixed_prep_data[] ={
|
||||
"OWNER@",
|
||||
"GROUP@",
|
||||
"EVERYONE@",
|
||||
"INTERACTIVE@",
|
||||
"NETWORK@",
|
||||
"DIALUP@",
|
||||
"BATCH@",
|
||||
"ANONYMOUS@",
|
||||
"AUTHENTICATED@",
|
||||
"\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D@slip129-37-118-146.nc.us.ibm.net",
|
||||
"\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d@saratoga.pe.utexas.edu",
|
||||
"\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e@dial-120-45.ots.utexas.edu",
|
||||
"\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f@woo-085.dorms.waller.net",
|
||||
"\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928@hd30-049.hil.compuserve.com",
|
||||
"\\u0935\\u093f\\u0937\\u093e\\u0926@pem203-31.pe.ttu.edu",
|
||||
"\\u092f\\u094b\\u0917@56K-227.MaxTNT3.pdq.net",
|
||||
"\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930@dial-36-2.ots.utexas.edu",
|
||||
"\\u0909\\u0935\\u093E\\u091A\\u0943@slip129-37-23-152.ga.us.ibm.net",
|
||||
"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947@ts45ip119.cadvision.com",
|
||||
"\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947@sdn-ts-004txaustP05.dialsprint.net",
|
||||
"\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e@bar-tnt1s66.erols.com",
|
||||
"\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903@101.st-louis-15.mo.dial-access.att.net",
|
||||
"\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903@h92-245.Arco.COM",
|
||||
"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935@dial-13-2.ots.utexas.edu",
|
||||
"\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924@net-redynet29.datamarkets.com.ar",
|
||||
"\\u0938\\u0902\\u091c\\u0935@ccs-shiva28.reacciun.net.ve",
|
||||
"\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d@7.houston-11.tx.dial-access.att.net",
|
||||
"\\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27@ingw129-37-120-26.mo.us.ibm.net",
|
||||
"\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d@dialup6.austintx.com",
|
||||
"\\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41@dns2.tpao.gov.tr",
|
||||
"\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d@slip129-37-119-194.nc.us.ibm.net",
|
||||
"\\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26@cs7.dillons.co.uk.203.119.193.in-addr.arpa",
|
||||
"\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d@swprd1.innovplace.saskatoon.sk.ca",
|
||||
"\\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26@bikini.bologna.maraut.it",
|
||||
"\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d@node91.subnet159-198-79.baxter.com",
|
||||
"\\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24@cust19.max5.new-york.ny.ms.uu.net",
|
||||
"\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30@balexander.slip.andrew.cmu.edu",
|
||||
"\\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32@pool029.max2.denver.co.dynip.alter.net",
|
||||
"\\u0c30\\u0c35\\u0c3f@cust49.max9.new-york.ny.ms.uu.net",
|
||||
"\\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d@s61.abq-dialin2.hollyberry.com",
|
||||
"\\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27@\\u0917\\u0928\\u0947\\u0936.sanjose.ibm.com",
|
||||
"\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f@www.\\u00E0\\u00B3\\u00AF.com",
|
||||
"\\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32@www.\\u00C2\\u00A4.com",
|
||||
"\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D@www.\\u00C2\\u00A3.com",
|
||||
"\\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f@\\u0025",
|
||||
"\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d@\\u005C\\u005C",
|
||||
"\\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f@www.\\u0021.com",
|
||||
"test@www.\\u0024.com",
|
||||
"help@\\u00C3\\u00BC.com",
|
||||
|
||||
};
|
||||
|
||||
#define MAX_BUFFER_SIZE 1000
|
||||
|
||||
static int32_t
|
||||
unescapeData(const char* src, int32_t srcLen,
|
||||
char* dest, int32_t destCapacity,
|
||||
UErrorCode* status){
|
||||
|
||||
UChar b1Stack[MAX_BUFFER_SIZE];
|
||||
char b2Stack[MAX_BUFFER_SIZE];
|
||||
int32_t b1Capacity = MAX_BUFFER_SIZE,
|
||||
b2Capacity = MAX_BUFFER_SIZE,
|
||||
b1Len = 0,
|
||||
b2Len = 0;
|
||||
|
||||
UChar* b1 = b1Stack;
|
||||
char* b2 = b2Stack;
|
||||
|
||||
b1Len = u_unescape(src,b1,b1Capacity);
|
||||
u_strToUTF8(b2, b2Capacity, &b2Len, b1, b1Len, status);
|
||||
if(U_SUCCESS(*status) && b2Len <= destCapacity){
|
||||
memmove(dest, b2, b2Len);
|
||||
}
|
||||
return b2Len;
|
||||
}
|
||||
static void
|
||||
Test_nfs4_mixed_prep(void){
|
||||
int32_t i=0;
|
||||
char src[MAX_BUFFER_SIZE];
|
||||
int32_t srcLen;
|
||||
|
||||
for(i=0; i< LENGTHOF(mixed_prep_data); i++){
|
||||
int32_t destLen=0;
|
||||
char* dest = NULL;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UParseError parseError;
|
||||
srcLen = unescapeData(mixed_prep_data[i], strlen(mixed_prep_data[i]), src, MAX_BUFFER_SIZE, &status);
|
||||
if(U_FAILURE(status)){
|
||||
log_err("Conversion of data at index %i failed. Error: %s\n", i, u_errorName(status));
|
||||
continue;
|
||||
}
|
||||
destLen = nfs4_mixed_prepare(src, srcLen, NULL, 0, &parseError, &status);
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
status = U_ZERO_ERROR;
|
||||
dest = (char*)malloc(++destLen);
|
||||
destLen = nfs4_mixed_prepare(src, srcLen, dest, destLen, &parseError, &status);
|
||||
}
|
||||
free(dest);
|
||||
if(U_FAILURE(status)){
|
||||
log_err("Preparation of string at index %i failed. Error: %s\n", i, u_errorName(status));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/* test the error condition */
|
||||
{
|
||||
const char* src = "OWNER@oss.software.ibm.com";
|
||||
char dest[MAX_BUFFER_SIZE];
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UParseError parseError;
|
||||
int32_t destLen = nfs4_mixed_prepare(src, srcLen, dest, MAX_BUFFER_SIZE, &parseError, &status);
|
||||
if(status != U_PARSE_ERROR){
|
||||
log_err("Did not get the expected error.Expected: %s Got: %s\n", u_errorName(U_PARSE_ERROR), u_errorName(status));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
302
icu4c/source/test/cintltst/sprpdata.c
Normal file
302
icu4c/source/test/cintltst/sprpdata.c
Normal file
|
@ -0,0 +1,302 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: spreptst.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003jul11
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "cintltst.h"
|
||||
#include "unicode/usprep.h"
|
||||
#include "sprpimpl.h"
|
||||
#include "uparse.h"
|
||||
#include "cmemory.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "cstring.h"
|
||||
|
||||
static void
|
||||
parseMappings(const char *filename, UStringPrepProfile* data, UBool reportError, UErrorCode *pErrorCode);
|
||||
|
||||
static void
|
||||
compareMapping(UStringPrepProfile* data, uint32_t codepoint, uint32_t* mapping, int32_t mapLength,
|
||||
UStringPrepType option);
|
||||
|
||||
static void
|
||||
compareFlagsForRange(UStringPrepProfile* data, uint32_t start, uint32_t end,UStringPrepType option);
|
||||
|
||||
|
||||
static void U_CALLCONV
|
||||
strprepProfileLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
uint32_t mapping[40];
|
||||
char *end, *map;
|
||||
uint32_t code;
|
||||
int32_t length;
|
||||
UStringPrepProfile* data = (UStringPrepProfile*) context;
|
||||
const char* typeName;
|
||||
uint32_t rangeStart=0,rangeEnd =0;
|
||||
const char* filename = (const char*) context;
|
||||
|
||||
typeName = fields[2][0];
|
||||
map = fields[1][0];
|
||||
|
||||
if(strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
|
||||
|
||||
/* store the range */
|
||||
compareFlagsForRange(data, rangeStart,rangeEnd,USPREP_UNASSIGNED);
|
||||
|
||||
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
|
||||
|
||||
/* store the range */
|
||||
compareFlagsForRange(data, rangeStart,rangeEnd,USPREP_PROHIBITED);
|
||||
|
||||
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
|
||||
/* get the character code, field 0 */
|
||||
code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
|
||||
|
||||
/* parse the mapping string */
|
||||
length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
|
||||
|
||||
/* compare the mapping */
|
||||
compareMapping(data, code,mapping, length,USPREP_MAP);
|
||||
|
||||
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_LABEL_SEPARATOR])!=NULL){
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
|
||||
|
||||
/* compare the range */
|
||||
compareFlagsForRange(data, rangeStart,rangeEnd,USPREP_LABEL_SEPARATOR);
|
||||
}else{
|
||||
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void
|
||||
parseMappings(const char *filename, UStringPrepProfile* data, UBool reportError, UErrorCode *pErrorCode) {
|
||||
char *fields[3][2];
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)data, pErrorCode);
|
||||
|
||||
//fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);
|
||||
|
||||
if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
|
||||
log_err( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static UStringPrepType
|
||||
getValues(uint32_t result, int32_t* value, UBool* isIndex){
|
||||
|
||||
UStringPrepType type;
|
||||
if(result == 0){
|
||||
/*
|
||||
* Initial value stored in the mapping table
|
||||
* just return USPREP_TYPE_LIMIT .. so that
|
||||
* the source codepoint is copied to the destination
|
||||
*/
|
||||
type = USPREP_TYPE_LIMIT;
|
||||
}else if(result >= _SPREP_TYPE_THRESHOLD){
|
||||
type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD);
|
||||
}else{
|
||||
/* get the type */
|
||||
type = USPREP_MAP;
|
||||
/* ascertain if the value is index or delta */
|
||||
if(result & 0x02){
|
||||
*isIndex = TRUE;
|
||||
*value = result >> 2;
|
||||
|
||||
}else{
|
||||
*isIndex = FALSE;
|
||||
*value = (int16_t)result;
|
||||
*value = (*value >> 2);
|
||||
|
||||
}
|
||||
if((result>>2) == _SPREP_MAX_INDEX_VALUE){
|
||||
type = USPREP_DELETE;
|
||||
isIndex =FALSE;
|
||||
value = 0;
|
||||
}
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
static void
|
||||
compareMapping(UStringPrepProfile* data, uint32_t codepoint, uint32_t* mapping,int32_t mapLength,
|
||||
UStringPrepType type){
|
||||
uint32_t result = 0;
|
||||
int32_t length=0;
|
||||
UBool isIndex = FALSE;
|
||||
UStringPrepType retType;
|
||||
int32_t value=0, index=0, delta=0;
|
||||
int32_t* indexes = data->indexes;
|
||||
UTrie trie = data->sprepTrie;
|
||||
const uint16_t* mappingData = data->mappingData;
|
||||
int32_t realLength =0;
|
||||
int32_t j=0;
|
||||
int8_t i=0;
|
||||
|
||||
UTRIE_GET16(&trie, codepoint, result);
|
||||
retType = getValues(result,&value,&isIndex);
|
||||
|
||||
|
||||
if(type != retType && retType != USPREP_DELETE){
|
||||
|
||||
log_err( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type);
|
||||
|
||||
}
|
||||
|
||||
if(isIndex){
|
||||
index = value;
|
||||
if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
|
||||
index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
|
||||
length = 1;
|
||||
}else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
|
||||
index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
|
||||
length = 2;
|
||||
}else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
|
||||
index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
|
||||
length = 3;
|
||||
}else{
|
||||
length = mappingData[index++];
|
||||
}
|
||||
}else{
|
||||
delta = value;
|
||||
length = (retType == USPREP_DELETE)? 0 : 1;
|
||||
}
|
||||
|
||||
/* figure out the real length */
|
||||
for(j=0; j<mapLength; j++){
|
||||
if(mapping[j] > 0xFFFF){
|
||||
realLength +=2;
|
||||
}else{
|
||||
realLength++;
|
||||
}
|
||||
}
|
||||
|
||||
if(realLength != length){
|
||||
log_err( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length);
|
||||
}
|
||||
|
||||
if(isIndex){
|
||||
for(i =0; i< mapLength; i++){
|
||||
if(mapping[i] <= 0xFFFF){
|
||||
if(mappingData[index+i] != (uint16_t)mapping[i]){
|
||||
log_err("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]);
|
||||
}
|
||||
}else{
|
||||
UChar lead = UTF16_LEAD(mapping[i]);
|
||||
UChar trail = UTF16_TRAIL(mapping[i]);
|
||||
if(mappingData[index+i] != lead ||
|
||||
mappingData[index+i+1] != trail){
|
||||
log_err( "Did not get the expected result. Expected: 0x%04X 0x%04X Got: 0x%04X 0x%04X\n", lead, trail, mappingData[index+i], mappingData[index+i+1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}else{
|
||||
if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){
|
||||
log_err("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
compareFlagsForRange(UStringPrepProfile* data,
|
||||
uint32_t start, uint32_t end,
|
||||
UStringPrepType type){
|
||||
|
||||
uint32_t result =0 ;
|
||||
UStringPrepType retType;
|
||||
UBool isIndex=FALSE;
|
||||
int32_t value=0;
|
||||
UTrie trie = data->sprepTrie;
|
||||
/*
|
||||
// supplementary code point
|
||||
UChar __lead16=UTF16_LEAD(0x2323E);
|
||||
int32_t __offset;
|
||||
|
||||
// get data for lead surrogate
|
||||
(result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16));
|
||||
__offset=(&idnTrie)->getFoldingOffset(result);
|
||||
|
||||
// get the real data from the folded lead/trail units
|
||||
if(__offset>0) {
|
||||
(result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff);
|
||||
} else {
|
||||
(result)=(uint32_t)((&idnTrie)->initialValue);
|
||||
}
|
||||
|
||||
UTRIE_GET16(&idnTrie,0x2323E, result);
|
||||
*/
|
||||
while(start < end+1){
|
||||
UTRIE_GET16(&trie,start, result);
|
||||
retType = getValues(result, &value, &isIndex);
|
||||
if(result > _SPREP_TYPE_THRESHOLD){
|
||||
if(retType != type){
|
||||
log_err( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
|
||||
}
|
||||
}else{
|
||||
if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){
|
||||
log_err( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
|
||||
}
|
||||
}
|
||||
|
||||
start++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
doStringPrepTest(const char* binFileName, const char* txtFileName, int32_t options, UErrorCode* errorCode){
|
||||
|
||||
const char *testdatapath = loadTestData(errorCode);
|
||||
const char *srcdatapath =ctest_dataOutDir();
|
||||
char *filename = (char*) malloc(2 * uprv_strlen(srcdatapath) );
|
||||
const char *relativepath = ".."U_FILE_SEP_STRING".."U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING;
|
||||
|
||||
UStringPrepProfile* profile = usprep_open(testdatapath, binFileName, errorCode);
|
||||
|
||||
if(U_FAILURE(*errorCode)){
|
||||
log_err("Failed to load %s data file. Error: %s \n", binFileName, u_errorName(*errorCode));
|
||||
return;
|
||||
}
|
||||
/* open and load the txt file */
|
||||
uprv_strcpy(filename,srcdatapath);
|
||||
uprv_strcat(filename,relativepath);
|
||||
uprv_strcat(filename,txtFileName);
|
||||
|
||||
parseMappings(filename,profile, TRUE,errorCode);
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
|
@ -273,7 +273,9 @@ testTrieRangesWithMalloc(const char *testName,
|
|||
storage = (uint8_t*) uprv_malloc(sizeof(uint8_t)*100000);
|
||||
|
||||
log_verbose("\ntesting Trie '%s'\n", testName);
|
||||
newTrie=utrie_open(NULL, NULL, 2000, checkRanges[0].value, latin1Linear);
|
||||
newTrie=utrie_open(NULL, NULL, 2000,
|
||||
checkRanges[0].value, checkRanges[0].value,
|
||||
latin1Linear);
|
||||
|
||||
/* set values from setRanges[] */
|
||||
ok=TRUE;
|
||||
|
@ -457,7 +459,9 @@ testTrieRanges(const char *testName,
|
|||
UBool overwrite, ok;
|
||||
|
||||
log_verbose("\ntesting Trie '%s'\n", testName);
|
||||
newTrie=utrie_open(NULL, NULL, 2000, checkRanges[0].value, latin1Linear);
|
||||
newTrie=utrie_open(NULL, NULL, 2000,
|
||||
checkRanges[0].value, checkRanges[0].value,
|
||||
latin1Linear);
|
||||
|
||||
/* set values from setRanges[] */
|
||||
ok=TRUE;
|
||||
|
|
|
@ -15,7 +15,7 @@ U_NAMESPACE_USE
|
|||
ContractionTableTest::ContractionTableTest() {
|
||||
status = U_ZERO_ERROR;
|
||||
/*testMapping = ucmpe32_open(0, 0, 0, &status);*/
|
||||
testMapping = utrie_open(NULL, NULL, 0, 0, TRUE);
|
||||
testMapping = utrie_open(NULL, NULL, 0, 0, 0, TRUE);
|
||||
}
|
||||
|
||||
ContractionTableTest::~ContractionTableTest() {
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
|
||||
#if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
|
||||
#include "idnaref.h"
|
||||
#include "strprep.h"
|
||||
#include "punyref.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "cmemory.h"
|
||||
|
|
|
@ -162,7 +162,7 @@ int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength,
|
|||
for(;bufIndex<bufLen;){
|
||||
U16_NEXT(buffer, bufIndex, bufLen, ch);
|
||||
if(unassigned.contains(ch)){
|
||||
status = U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR;
|
||||
status = U_IDNA_UNASSIGNED_ERROR;
|
||||
rsource.releaseBuffer();
|
||||
return 0;
|
||||
}
|
||||
|
@ -231,7 +231,7 @@ int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength,
|
|||
U16_NEXT(b1, b1Index, b1Len, ch);
|
||||
|
||||
if(prohibited.contains(ch) && ch!=0x0020){
|
||||
status = U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR;
|
||||
status = U_IDNA_PROHIBITED_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
#if !UCONFIG_NO_IDNA
|
||||
#if !UCONFIG_NO_TRANSLITERATION
|
||||
|
||||
#include "strprep.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/translit.h"
|
||||
|
|
|
@ -37,39 +37,22 @@
|
|||
#include "umutex.h"
|
||||
#include "sprpimpl.h"
|
||||
#include "testidna.h"
|
||||
#include "punyref.h"
|
||||
|
||||
UBool beVerbose=FALSE, haveCopyright=TRUE;
|
||||
|
||||
/* prototypes --------------------------------------------------------------- */
|
||||
|
||||
|
||||
static UBool isDataLoaded = FALSE;
|
||||
static UTrie idnTrie={ 0,0,0,0,0,0,0 };
|
||||
static UDataMemory *idnData=NULL;
|
||||
static UErrorCode dataErrorCode =U_ZERO_ERROR;
|
||||
|
||||
|
||||
static const uint16_t* mappingData = NULL;
|
||||
static int32_t indexes[_IDNA_INDEX_TOP]={ 0 };
|
||||
|
||||
|
||||
static void
|
||||
parseMappings(const char *filename, UBool withNorm, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode);
|
||||
|
||||
static void
|
||||
parseTable(const char *filename, UBool isUnassigned, TestIDNA& test, UErrorCode *pErrorCode);
|
||||
|
||||
static UBool loadIDNData(UErrorCode &errorCode);
|
||||
|
||||
static UBool cleanup();
|
||||
parseMappings(const char *filename, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode);
|
||||
|
||||
static void
|
||||
compareMapping(uint32_t codepoint, uint32_t* mapping, int32_t mapLength,
|
||||
UBool withNorm);
|
||||
UStringPrepType option);
|
||||
|
||||
static void
|
||||
compareFlagsForRange(uint32_t start, uint32_t end,
|
||||
UBool isUnassigned);
|
||||
compareFlagsForRange(uint32_t start, uint32_t end,UStringPrepType option);
|
||||
|
||||
static void
|
||||
testAllCodepoints(TestIDNA& test);
|
||||
|
@ -77,12 +60,12 @@ testAllCodepoints(TestIDNA& test);
|
|||
static TestIDNA* pTestIDNA =NULL;
|
||||
|
||||
static const char* fileNames[] = {
|
||||
"rfc3454_A_1.txt", /* contains unassigned code points */
|
||||
"rfc3454_C_X.txt", /* contains code points that are prohibited */
|
||||
"rfc3454_B_1.txt", /* contains case mappings when normalization is turned off */
|
||||
"rfc3454_B_2.txt", /* contains case mappings when normalization it turned on */
|
||||
/* "NormalizationCorrections.txt",contains NFKC case mappings whicha are not included in UTR 21 */
|
||||
};
|
||||
"NamePrepProfile.txt"
|
||||
};
|
||||
static UStringPrepProfile *profile = NULL;
|
||||
static const UTrie *idnTrie = NULL;
|
||||
static const int32_t *indexes = NULL;
|
||||
static const uint16_t *mappingData = NULL;
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* file definitions */
|
||||
|
@ -100,12 +83,16 @@ testData(TestIDNA& test) {
|
|||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
char *saveBasename =NULL;
|
||||
|
||||
loadIDNData(errorCode);
|
||||
if(U_FAILURE(dataErrorCode)){
|
||||
test.errln( "Could not load data. Error: %s\n",u_errorName(dataErrorCode));
|
||||
return dataErrorCode;
|
||||
profile = usprep_open(NULL, DATA_NAME, &errorCode);
|
||||
if(U_FAILURE(errorCode)){
|
||||
test.errln("Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode)));
|
||||
return errorCode;
|
||||
}
|
||||
|
||||
idnTrie = &profile->sprepTrie;
|
||||
indexes = profile->indexes;
|
||||
mappingData = profile->mappingData;
|
||||
|
||||
//initialize
|
||||
pTestIDNA = &test;
|
||||
|
||||
|
@ -136,28 +123,7 @@ testData(TestIDNA& test) {
|
|||
|
||||
/* process unassigned */
|
||||
uprv_strcpy(basename,fileNames[0]);
|
||||
parseTable(filename,TRUE, test,&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
test.errln( "Could not open file %s for reading \n", filename);
|
||||
return errorCode;
|
||||
}
|
||||
/* process prohibited */
|
||||
uprv_strcpy(basename,fileNames[1]);
|
||||
parseTable(filename,FALSE, test, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
test.errln( "Could not open file %s for reading \n", filename);
|
||||
return errorCode;
|
||||
}
|
||||
|
||||
/* process mappings */
|
||||
uprv_strcpy(basename,fileNames[2]);
|
||||
parseMappings(filename, FALSE, FALSE,test, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
test.errln( "Could not open file %s for reading \n", filename);
|
||||
return errorCode;
|
||||
}
|
||||
uprv_strcpy(basename,fileNames[3]);
|
||||
parseMappings(filename, TRUE, FALSE,test, &errorCode);
|
||||
parseMappings(filename,TRUE, test,&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
test.errln( "Could not open file %s for reading \n", filename);
|
||||
return errorCode;
|
||||
|
@ -165,48 +131,76 @@ testData(TestIDNA& test) {
|
|||
|
||||
testAllCodepoints(test);
|
||||
|
||||
cleanup();
|
||||
usprep_close(profile);
|
||||
pTestIDNA = NULL;
|
||||
free(filename);
|
||||
return errorCode;
|
||||
}
|
||||
U_CDECL_BEGIN
|
||||
|
||||
static void U_CALLCONV
|
||||
caseMapLineFn(void *context,
|
||||
char *fields[][2], int32_t /*fieldCount*/,
|
||||
strprepProfileLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
uint32_t mapping[40];
|
||||
char *end, *s;
|
||||
char *end, *map;
|
||||
uint32_t code;
|
||||
int32_t length;
|
||||
UBool* mapWithNorm = (UBool*) context;
|
||||
/*UBool* mapWithNorm = (UBool*) context;*/
|
||||
const char* typeName;
|
||||
uint32_t rangeStart=0,rangeEnd =0;
|
||||
const char* filename = (const char*) context;
|
||||
|
||||
typeName = fields[2][0];
|
||||
map = fields[1][0];
|
||||
|
||||
if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
|
||||
|
||||
/* get the character code, field 0 */
|
||||
code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
|
||||
if(end<=fields[0][0] || end!=fields[0][1]) {
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
|
||||
|
||||
/* store the range */
|
||||
compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED);
|
||||
|
||||
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
|
||||
|
||||
/* store the range */
|
||||
compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED);
|
||||
|
||||
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
|
||||
/* get the character code, field 0 */
|
||||
code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
|
||||
|
||||
/* parse the mapping string */
|
||||
length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
|
||||
|
||||
/* store the mapping */
|
||||
compareMapping(code,mapping, length,USPREP_MAP);
|
||||
|
||||
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_LABEL_SEPARATOR])!=NULL){
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
|
||||
|
||||
/* store the range */
|
||||
compareFlagsForRange(rangeStart,rangeEnd,USPREP_LABEL_SEPARATOR);
|
||||
}else{
|
||||
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
||||
}
|
||||
|
||||
s = fields[1][0];
|
||||
/* parse the mapping string */
|
||||
length=u_parseCodePoints(s, mapping, sizeof(mapping)/4, pErrorCode);
|
||||
|
||||
/* store the mapping */
|
||||
|
||||
compareMapping(code,mapping, length, *mapWithNorm);
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
static void
|
||||
parseMappings(const char *filename,UBool withNorm, UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) {
|
||||
parseMappings(const char *filename,UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) {
|
||||
char *fields[3][2];
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, 3, caseMapLineFn, &withNorm, pErrorCode);
|
||||
u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode);
|
||||
|
||||
//fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);
|
||||
|
||||
|
@ -215,142 +209,167 @@ parseMappings(const char *filename,UBool withNorm, UBool reportError, TestIDNA&
|
|||
}
|
||||
}
|
||||
|
||||
/* parser for UnicodeData.txt ----------------------------------------------- */
|
||||
U_CDECL_BEGIN
|
||||
|
||||
static void U_CALLCONV
|
||||
unicodeDataLineFn(void *context,
|
||||
char *fields[][2], int32_t /*fieldCount*/,
|
||||
UErrorCode *pErrorCode) {
|
||||
uint32_t rangeStart=0,rangeEnd =0;
|
||||
UBool* isUnassigned = (UBool*) context;
|
||||
static inline UStringPrepType
|
||||
getValues(uint32_t result, int32_t& value, UBool& isIndex){
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
|
||||
|
||||
if(U_FAILURE(*pErrorCode)){
|
||||
*pErrorCode = U_PARSE_ERROR;
|
||||
return;
|
||||
UStringPrepType type;
|
||||
|
||||
if(result == 0){
|
||||
/*
|
||||
* Initial value stored in the mapping table
|
||||
* just return USPREP_TYPE_LIMIT .. so that
|
||||
* the source codepoint is copied to the destination
|
||||
*/
|
||||
type = USPREP_TYPE_LIMIT;
|
||||
}else if(result >= _SPREP_TYPE_THRESHOLD){
|
||||
type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD);
|
||||
}else{
|
||||
/* get the state */
|
||||
type = USPREP_MAP;
|
||||
/* ascertain if the value is index or delta */
|
||||
if(result & 0x02){
|
||||
isIndex = TRUE;
|
||||
value = result >> 2; //mask off the lower 2 bits and shift
|
||||
|
||||
}else{
|
||||
isIndex = FALSE;
|
||||
value = (int16_t)result;
|
||||
value = (value >> 2);
|
||||
|
||||
}
|
||||
if((result>>2) == _SPREP_MAX_INDEX_VALUE){
|
||||
type = USPREP_DELETE;
|
||||
isIndex =FALSE;
|
||||
value = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
compareFlagsForRange(rangeStart,rangeEnd,*isUnassigned);
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
static void
|
||||
parseTable(const char *filename,UBool isUnassigned,TestIDNA& test, UErrorCode *pErrorCode) {
|
||||
char *fields[2][2];
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, 1, unicodeDataLineFn, &isUnassigned, pErrorCode);
|
||||
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
test.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
testAllCodepoints(TestIDNA& test){
|
||||
if(isDataLoaded){
|
||||
uint32_t i = 0;
|
||||
int32_t unassigned = 0;
|
||||
int32_t prohibited = 0;
|
||||
int32_t mappedWithNorm = 0;
|
||||
int32_t mapped = 0;
|
||||
int32_t noValueInTrie = 0;
|
||||
/*
|
||||
{
|
||||
UChar str[19] = {
|
||||
0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
|
||||
0x070F,//prohibited
|
||||
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74
|
||||
};
|
||||
uint32_t in[19] = {0};
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t inLength=0, outLength=100;
|
||||
char output[100] = {0};
|
||||
punycode_status error;
|
||||
u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status);
|
||||
|
||||
|
||||
for(i=0;i<=0x10FFFF;i++){
|
||||
uint32_t result = 0;
|
||||
UTRIE_GET16(&idnTrie,i, result);
|
||||
|
||||
if(result != UIDNA_NO_VALUE ){
|
||||
if((result & 0x07) == UIDNA_UNASSIGNED){
|
||||
unassigned++;
|
||||
}
|
||||
if((result & 0x07) == UIDNA_PROHIBITED){
|
||||
prohibited++;
|
||||
}
|
||||
if((result>>5) == _IDNA_MAP_TO_NOTHING){
|
||||
mapped++;
|
||||
}
|
||||
if((result & 0x07) == UIDNA_MAP_NFKC){
|
||||
mappedWithNorm++;
|
||||
}
|
||||
}else{
|
||||
noValueInTrie++;
|
||||
if(result > 0){
|
||||
test.errln("The return value for 0x%06X is wrong. %i\n",i,result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test.logln("Number of Unassinged code points : %i \n",unassigned);
|
||||
test.logln("Number of Prohibited code points : %i \n",prohibited);
|
||||
test.logln("Number of Mapped code points : %i \n",mapped);
|
||||
test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm);
|
||||
test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie);
|
||||
error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output);
|
||||
printf(output);
|
||||
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
static inline void getValues(uint32_t result, int8_t& flag,
|
||||
int8_t& length, int32_t& index){
|
||||
/* first 3 bits contain the flag */
|
||||
flag = (int8_t) (result & 0x07);
|
||||
/* next 2 bits contain the length */
|
||||
length = (int8_t) ((result>>3) & 0x03);
|
||||
/* next 11 bits contain the index */
|
||||
index = (result>> 5);
|
||||
uint32_t i = 0;
|
||||
int32_t unassigned = 0;
|
||||
int32_t prohibited = 0;
|
||||
int32_t mappedWithNorm = 0;
|
||||
int32_t mapped = 0;
|
||||
int32_t noValueInTrie = 0;
|
||||
|
||||
UStringPrepType type;
|
||||
int32_t value;
|
||||
UBool isIndex = FALSE;
|
||||
|
||||
for(i=0;i<=0x10FFFF;i++){
|
||||
uint32_t result = 0;
|
||||
UTRIE_GET16(idnTrie,i, result);
|
||||
type = getValues(result,value, isIndex);
|
||||
if(type != USPREP_TYPE_LIMIT ){
|
||||
if(type == USPREP_UNASSIGNED){
|
||||
unassigned++;
|
||||
}
|
||||
if(type == USPREP_PROHIBITED){
|
||||
prohibited++;
|
||||
}
|
||||
if(type == USPREP_MAP){
|
||||
mapped++;
|
||||
}
|
||||
if(type == USPREP_LABEL_SEPARATOR){
|
||||
mappedWithNorm++;
|
||||
}
|
||||
}else{
|
||||
noValueInTrie++;
|
||||
if(result > 0){
|
||||
test.errln("The return value for 0x%06X is wrong. %i\n",i,result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test.logln("Number of Unassinged code points : %i \n",unassigned);
|
||||
test.logln("Number of Prohibited code points : %i \n",prohibited);
|
||||
test.logln("Number of Mapped code points : %i \n",mapped);
|
||||
test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm);
|
||||
test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie);
|
||||
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength,
|
||||
UBool withNorm){
|
||||
if(isDataLoaded){
|
||||
uint32_t result = 0;
|
||||
UTRIE_GET16(&idnTrie,codepoint, result);
|
||||
UStringPrepType type){
|
||||
uint32_t result = 0;
|
||||
UTRIE_GET16(idnTrie,codepoint, result);
|
||||
|
||||
int8_t flag, length;
|
||||
int32_t index;
|
||||
getValues(result,flag,length, index);
|
||||
int32_t length=0;
|
||||
UBool isIndex;
|
||||
UStringPrepType retType;
|
||||
int32_t value, index=0, delta=0;
|
||||
|
||||
retType = getValues(result,value,isIndex);
|
||||
|
||||
|
||||
if(withNorm){
|
||||
if(flag != UIDNA_MAP_NFKC){
|
||||
pTestIDNA->errln( "Did not get the assigned flag for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, UIDNA_MAP_NFKC, flag);
|
||||
}
|
||||
if(type != retType && retType != USPREP_DELETE){
|
||||
|
||||
pTestIDNA->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type);
|
||||
|
||||
}
|
||||
|
||||
if(isIndex){
|
||||
index = value;
|
||||
if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
|
||||
index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
|
||||
length = 1;
|
||||
}else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
|
||||
index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
|
||||
length = 2;
|
||||
}else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
|
||||
index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
|
||||
length = 3;
|
||||
}else{
|
||||
if(flag==UIDNA_NO_VALUE || flag == UIDNA_PROHIBITED){
|
||||
if(index != _IDNA_MAP_TO_NOTHING ){
|
||||
pTestIDNA->errln( "Did not get the assigned flag for codepoint 0x%08X. Expected: %i Got: %i\n", codepoint, _IDNA_MAP_TO_NOTHING, index);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(length ==_IDNA_LENGTH_IN_MAPPING_TABLE){
|
||||
length = (int8_t)mappingData[index];
|
||||
index++;
|
||||
}
|
||||
int32_t realLength =0;
|
||||
/* figure out the real length */
|
||||
for(int32_t j=0; j<mapLength; j++){
|
||||
if(mapping[j] > 0xFFFF){
|
||||
realLength +=2;
|
||||
}else{
|
||||
realLength++;
|
||||
}
|
||||
length = mappingData[index++];
|
||||
}
|
||||
}else{
|
||||
delta = value;
|
||||
length = (retType == USPREP_DELETE)? 0 : 1;
|
||||
}
|
||||
|
||||
if(realLength != length){
|
||||
pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length);
|
||||
}
|
||||
|
||||
int32_t realLength =0;
|
||||
/* figure out the real length */
|
||||
for(int32_t j=0; j<mapLength; j++){
|
||||
if(mapping[j] > 0xFFFF){
|
||||
realLength +=2;
|
||||
}else{
|
||||
realLength++;
|
||||
}
|
||||
}
|
||||
|
||||
if(realLength != length){
|
||||
pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length);
|
||||
}
|
||||
|
||||
if(isIndex){
|
||||
for(int8_t i =0; i< mapLength; i++){
|
||||
if(mapping[i] <= 0xFFFF){
|
||||
if(mappingData[index+i] != (uint16_t)mapping[i]){
|
||||
|
@ -365,132 +384,58 @@ compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
}else{
|
||||
if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){
|
||||
pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
compareFlagsForRange(uint32_t start, uint32_t end,
|
||||
UBool isUnassigned){
|
||||
if(isDataLoaded){
|
||||
uint32_t result =0 ;
|
||||
while(start < end+1){
|
||||
UTRIE_GET16(&idnTrie,start, result);
|
||||
if(isUnassigned){
|
||||
if(result != UIDNA_UNASSIGNED){
|
||||
pTestIDNA->errln( "UIDNA_UASSIGNED flag failed for 0x%06X. Expected: %04X Got: %04X\n",start,UIDNA_UNASSIGNED, result);
|
||||
}
|
||||
}else{
|
||||
if((result & 0x03) != UIDNA_PROHIBITED){
|
||||
pTestIDNA->errln( "UIDNA_PROHIBITED flag failed for 0x%06X. Expected: %04X Got: %04X\n\n",start,UIDNA_PROHIBITED, result);
|
||||
}
|
||||
UStringPrepType type){
|
||||
|
||||
uint32_t result =0 ;
|
||||
UStringPrepType retType;
|
||||
UBool isIndex=FALSE;
|
||||
int32_t value=0;
|
||||
/*
|
||||
// supplementary code point
|
||||
UChar __lead16=UTF16_LEAD(0x2323E);
|
||||
int32_t __offset;
|
||||
|
||||
// get data for lead surrogate
|
||||
(result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16));
|
||||
__offset=(&idnTrie)->getFoldingOffset(result);
|
||||
|
||||
// get the real data from the folded lead/trail units
|
||||
if(__offset>0) {
|
||||
(result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff);
|
||||
} else {
|
||||
(result)=(uint32_t)((&idnTrie)->initialValue);
|
||||
}
|
||||
|
||||
UTRIE_GET16(&idnTrie,0x2323E, result);
|
||||
*/
|
||||
while(start < end+1){
|
||||
UTRIE_GET16(idnTrie,start, result);
|
||||
retType = getValues(result,value,isIndex);
|
||||
if(result > _SPREP_TYPE_THRESHOLD){
|
||||
if(retType != type){
|
||||
pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
|
||||
}
|
||||
}else{
|
||||
if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){
|
||||
pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
|
||||
}
|
||||
start++;
|
||||
}
|
||||
|
||||
start++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
UBool
|
||||
cleanup() {
|
||||
if(idnData!=NULL) {
|
||||
udata_close(idnData);
|
||||
idnData=NULL;
|
||||
}
|
||||
dataErrorCode=U_ZERO_ERROR;
|
||||
isDataLoaded=FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV
|
||||
isAcceptable(void * /* context */,
|
||||
const char * /* type */, const char * /* name */,
|
||||
const UDataInfo *pInfo) {
|
||||
if(
|
||||
pInfo->size>=20 &&
|
||||
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
||||
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
||||
pInfo->dataFormat[0]==0x49 && /* dataFormat="IDNA" 0x49, 0x44, 0x4e, 0x41 */
|
||||
pInfo->dataFormat[1]==0x44 &&
|
||||
pInfo->dataFormat[2]==0x4e &&
|
||||
pInfo->dataFormat[3]==0x41 &&
|
||||
pInfo->formatVersion[0]==2 &&
|
||||
pInfo->formatVersion[2]==UTRIE_SHIFT &&
|
||||
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
|
||||
) {
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* idnTrie: the folding offset is the lead FCD value itself */
|
||||
static int32_t U_CALLCONV
|
||||
getFoldingOffset(uint32_t data) {
|
||||
if(data&0x8000) {
|
||||
return (int32_t)(data&0x7fff);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
U_CDECL_END
|
||||
|
||||
static UBool
|
||||
loadIDNData(UErrorCode &errorCode) {
|
||||
/* load Unicode normalization data from file */
|
||||
if(isDataLoaded==FALSE) {
|
||||
UTrie _idnTrie={ 0,0,0,0,0,0,0 };
|
||||
UDataMemory *data;
|
||||
const int32_t *p=NULL;
|
||||
const uint8_t *pb;
|
||||
if(&errorCode==NULL || U_FAILURE(errorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* open the data outside the mutex block */
|
||||
data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errorCode);
|
||||
dataErrorCode=errorCode;
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return isDataLoaded=FALSE;
|
||||
}
|
||||
|
||||
p=(const int32_t *)udata_getMemory(data);
|
||||
pb=(const uint8_t *)(p+_IDNA_INDEX_TOP);
|
||||
utrie_unserialize(&_idnTrie, pb, p[_IDNA_INDEX_TRIE_SIZE], &errorCode);
|
||||
_idnTrie.getFoldingOffset=getFoldingOffset;
|
||||
|
||||
|
||||
if(U_FAILURE(errorCode)) {
|
||||
dataErrorCode=errorCode;
|
||||
udata_close(data);
|
||||
return isDataLoaded=FALSE;
|
||||
}
|
||||
|
||||
/* in the mutex block, set the data for this process */
|
||||
umtx_lock(NULL);
|
||||
if(idnData==NULL) {
|
||||
idnData=data;
|
||||
data=NULL;
|
||||
uprv_memcpy(&indexes, p, sizeof(indexes));
|
||||
uprv_memcpy(&idnTrie, &_idnTrie, sizeof(UTrie));
|
||||
} else {
|
||||
p=(const int32_t *)udata_getMemory(idnData);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
/* initialize some variables */
|
||||
mappingData=(uint16_t *)((uint8_t *)(p+_IDNA_INDEX_TOP)+indexes[_IDNA_INDEX_TRIE_SIZE]);
|
||||
|
||||
isDataLoaded = TRUE;
|
||||
|
||||
/* if a different thread set it first, then close the extra data */
|
||||
if(data!=NULL) {
|
||||
udata_close(data); /* NULL if it was set correctly */
|
||||
}
|
||||
}
|
||||
|
||||
return isDataLoaded;
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_IDNA */
|
||||
|
||||
|
|
|
@ -221,7 +221,7 @@ static const char *domainNames[] = {
|
|||
"www.\\u0021.com",
|
||||
"www.\\u0024.com",
|
||||
"\\u003f",
|
||||
// These yeild U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
// These yeild U_IDNA_PROHIBITED_ERROR
|
||||
//"\\u00CF\\u0082.com",
|
||||
//"\\u00CE\\u00B2\\u00C3\\u009Fss.com",
|
||||
//"\\u00E2\\u0098\\u00BA.com",
|
||||
|
@ -245,13 +245,13 @@ static struct ErrorCases{
|
|||
{
|
||||
0x0077, 0x0077, 0x0077, 0x002e, /* www. */
|
||||
0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
|
||||
0x2060,/*prohibited*/
|
||||
0x070F,/*prohibited*/
|
||||
0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
|
||||
0x002e, 0x0063, 0x006f, 0x006d, /* com. */
|
||||
0x0000
|
||||
},
|
||||
"www.XN--fxG2146CsoA28OruCyA378BqrE2tCwOp06C5qBw82A1rFfmAE0361DeA96B.com",
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR,
|
||||
"www.XN--8mb5595fsoa28orucya378bqre2tcwop06c5qbw82a1rffmae0361dea96b.com",
|
||||
U_IDNA_PROHIBITED_ERROR,
|
||||
FALSE, TRUE, TRUE
|
||||
},
|
||||
|
||||
|
@ -265,7 +265,7 @@ static struct ErrorCases{
|
|||
},
|
||||
"www.XN--6lA2Bz548Fj1GuA391Bf1Gb1N59Ab29A7iA.com",
|
||||
|
||||
U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR,
|
||||
U_IDNA_UNASSIGNED_ERROR,
|
||||
FALSE, TRUE, TRUE
|
||||
},
|
||||
{
|
||||
|
@ -349,7 +349,7 @@ static struct ErrorCases{
|
|||
0x0000
|
||||
},
|
||||
"www.XN--ghbgi278xia.com",
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR,
|
||||
U_IDNA_PROHIBITED_ERROR,
|
||||
FALSE, TRUE, TRUE
|
||||
},
|
||||
{
|
||||
|
@ -423,78 +423,78 @@ static struct ConformanceTestCases
|
|||
"Non-ASCII multibyte space character U+1680",
|
||||
"\xE1\x9A\x80", NULL,
|
||||
"Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Non-ASCII 8bit control character U+0085",
|
||||
"\xC2\x85", NULL,
|
||||
"Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Non-ASCII multibyte control character U+180E",
|
||||
"\xE1\xA0\x8E", NULL,
|
||||
"Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Non-ASCII control character U+1D175",
|
||||
"\xF0\x9D\x85\xB5", NULL,
|
||||
"Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Plane 0 private use character U+F123",
|
||||
"\xEF\x84\xA3", NULL,
|
||||
"Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Plane 15 private use character U+F1234",
|
||||
"\xF3\xB1\x88\xB4", NULL,
|
||||
"Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Plane 16 private use character U+10F234",
|
||||
"\xF4\x8F\x88\xB4", NULL,
|
||||
"Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Non-character code point U+8FFFE",
|
||||
"\xF2\x8F\xBF\xBE", NULL,
|
||||
"Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Non-character code point U+10FFFF",
|
||||
"\xF4\x8F\xBF\xBF", NULL,
|
||||
"Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
/*
|
||||
{
|
||||
"Surrogate code U+DF42",
|
||||
"\xED\xBD\x82", NULL, "Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
*/
|
||||
{
|
||||
"Non-plain text character U+FFFD",
|
||||
"\xEF\xBF\xBD", NULL,
|
||||
"Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Ideographic description character U+2FF5",
|
||||
"\xE2\xBF\xB5", NULL,
|
||||
"Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Display property character U+0341",
|
||||
"\xCD\x81", "\xCD\x81",
|
||||
"\xCD\x81", "\xCC\x81",
|
||||
"Nameprep", UIDNA_DEFAULT, U_ZERO_ERROR
|
||||
|
||||
},
|
||||
|
@ -503,26 +503,26 @@ static struct ConformanceTestCases
|
|||
"Left-to-right mark U+200E",
|
||||
"\xE2\x80\x8E", "\xCC\x81",
|
||||
"Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
|
||||
"Deprecated U+202A",
|
||||
"\xE2\x80\xAA", "\xCC\x81",
|
||||
"Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Language tagging character U+E0001",
|
||||
"\xF3\xA0\x80\x81", "\xCC\x81",
|
||||
"Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Language tagging character U+E0042",
|
||||
"\xF3\xA0\x81\x82", NULL,
|
||||
"Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_PROHIBITED_ERROR
|
||||
},
|
||||
{
|
||||
"Bidi: RandALCat character U+05BE and LCat characters",
|
||||
|
@ -557,7 +557,7 @@ static struct ConformanceTestCases
|
|||
"Unassigned code point U+E0002",
|
||||
"\xF3\xA0\x80\x82", NULL,
|
||||
"Nameprep", UIDNA_DEFAULT,
|
||||
U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR
|
||||
U_IDNA_UNASSIGNED_ERROR
|
||||
},
|
||||
|
||||
/* // Invalid UTF-8
|
||||
|
@ -585,7 +585,39 @@ static struct ConformanceTestCases
|
|||
|
||||
#define MAX_DEST_SIZE 300
|
||||
|
||||
void TestIDNA::debug(const UChar* src, int32_t srcLength, int32_t options){
|
||||
UParseError parseError;
|
||||
UErrorCode transStatus = U_ZERO_ERROR;
|
||||
UErrorCode prepStatus = U_ZERO_ERROR;
|
||||
NamePrepTransform* trans = NamePrepTransform::createInstance(parseError,transStatus);
|
||||
int32_t prepOptions = (((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0);
|
||||
UStringPrepProfile* prep = usprep_open(NULL,"uidna",&prepStatus);
|
||||
UChar *transOut=NULL, *prepOut=NULL;
|
||||
int32_t transOutLength=0, prepOutLength=0;
|
||||
|
||||
|
||||
transOutLength = trans->process(src,srcLength,transOut, 0, prepOptions>0, &parseError, transStatus);
|
||||
if( transStatus == U_BUFFER_OVERFLOW_ERROR){
|
||||
transStatus = U_ZERO_ERROR;
|
||||
transOut = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * transOutLength);
|
||||
transOutLength = trans->process(src,srcLength,transOut, transOutLength, prepOptions>0, &parseError, transStatus);
|
||||
}
|
||||
|
||||
prepOutLength = usprep_prepare(prep, src, srcLength, prepOut, 0, prepOptions, &parseError, &prepStatus);
|
||||
|
||||
if( prepStatus == U_BUFFER_OVERFLOW_ERROR){
|
||||
prepStatus = U_ZERO_ERROR;
|
||||
prepOut = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * prepOutLength);
|
||||
prepOutLength = usprep_prepare(prep, src, srcLength, prepOut, prepOutLength, prepOptions, &parseError, &prepStatus);
|
||||
}
|
||||
|
||||
if(UnicodeString(transOut,transOutLength)!= UnicodeString(prepOut, prepOutLength)){
|
||||
errln("Failed. Expected: " + prettify(UnicodeString(transOut, transOutLength))
|
||||
+ " Got: " + prettify(UnicodeString(prepOut,prepOutLength)));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* testName,
|
||||
UBool useSTD3ASCIIRules,UErrorCode expectedStatus,
|
||||
|
@ -609,7 +641,7 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
|
|||
|
||||
// test null-terminated source and return value of number of UChars required
|
||||
if( expectedStatus != U_IDNA_STD3_ASCII_RULES_ERROR ){
|
||||
destLen = func(src,-1,dest,0,options, &parseError , &status);
|
||||
destLen = func(src,-1,NULL,0,options, &parseError , &status);
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
status = U_ZERO_ERROR; // reset error code
|
||||
if(destLen+1 < MAX_DEST_SIZE){
|
||||
|
@ -634,7 +666,7 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
|
|||
}
|
||||
if(testUnassigned ){
|
||||
status = U_ZERO_ERROR;
|
||||
destLen = func(src,-1,dest,0,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status);
|
||||
destLen = func(src,-1,NULL,0,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status);
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
status = U_ZERO_ERROR; // reset error code
|
||||
if(destLen+1 < MAX_DEST_SIZE){
|
||||
|
@ -643,7 +675,12 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
|
|||
// TODO : compare output with expected
|
||||
if(U_SUCCESS(status) && (doCompare==TRUE) && u_strCaseCompare(dest,destLen, expected,expectedLen,0,&status)!=0){
|
||||
//errln("Did not get the expected result for %s null terminated source with both options set.\n",testName);
|
||||
errln("Did not get the expected result for "+UnicodeString(testName) +" null terminated source with both options set. Expected: "+ prettify(UnicodeString(expected,expectedLen)));
|
||||
errln("Did not get the expected result for "+UnicodeString(testName) +
|
||||
" null terminated source "+ prettify(src) +
|
||||
" with both options set. Expected: "+ prettify(UnicodeString(expected,expectedLen))+
|
||||
"Got: " + prettify(UnicodeString(dest,destLen)));
|
||||
|
||||
debug(src,-1,options | UIDNA_ALLOW_UNASSIGNED);
|
||||
|
||||
}
|
||||
}else{
|
||||
|
@ -651,7 +688,7 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
|
|||
}
|
||||
}
|
||||
//testing query string
|
||||
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR){
|
||||
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_ERROR){
|
||||
errln( "Did not get the expected error for %s null terminated source with options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
|
||||
}
|
||||
}
|
||||
|
@ -659,7 +696,7 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
|
|||
status = U_ZERO_ERROR;
|
||||
|
||||
// test source with lengthand return value of number of UChars required
|
||||
destLen = func(tSrc, tSrcLen, dest,0,options, &parseError, &status);
|
||||
destLen = func(tSrc, tSrcLen, NULL,0,options, &parseError, &status);
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
status = U_ZERO_ERROR; // reset error code
|
||||
if(destLen+1 < MAX_DEST_SIZE){
|
||||
|
@ -680,7 +717,7 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
|
|||
if(testUnassigned){
|
||||
status = U_ZERO_ERROR;
|
||||
|
||||
destLen = func(tSrc,tSrcLen,dest,0,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status);
|
||||
destLen = func(tSrc,tSrcLen,NULL,0,options | UIDNA_ALLOW_UNASSIGNED, &parseError, &status);
|
||||
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
status = U_ZERO_ERROR; // reset error code
|
||||
|
@ -696,14 +733,14 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
|
|||
}
|
||||
}
|
||||
//testing query string
|
||||
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR){
|
||||
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_ERROR){
|
||||
errln( "Did not get the expected error for %s with source length and options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
|
||||
}
|
||||
}
|
||||
}else{
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
destLen = func(src,-1,dest,0,options | UIDNA_USE_STD3_RULES, &parseError, &status);
|
||||
destLen = func(src,-1,NULL,0,options | UIDNA_USE_STD3_RULES, &parseError, &status);
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
status = U_ZERO_ERROR; // reset error code
|
||||
if(destLen+1 < MAX_DEST_SIZE){
|
||||
|
@ -726,7 +763,7 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
|
|||
|
||||
status = U_ZERO_ERROR;
|
||||
|
||||
destLen = func(tSrc,tSrcLen,dest,0,options | UIDNA_USE_STD3_RULES, &parseError, &status);
|
||||
destLen = func(tSrc,tSrcLen,NULL,0,options | UIDNA_USE_STD3_RULES, &parseError, &status);
|
||||
|
||||
if(status == U_BUFFER_OVERFLOW_ERROR){
|
||||
status = U_ZERO_ERROR; // reset error code
|
||||
|
@ -742,7 +779,7 @@ void TestIDNA::testAPI(const UChar* src, const UChar* expected, const char* test
|
|||
}
|
||||
}
|
||||
//testing query string
|
||||
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR){
|
||||
if(status != expectedStatus && expectedStatus != U_IDNA_UNASSIGNED_ERROR){
|
||||
errln( "Did not get the expected error for %s with source length and options set. Expected: %s Got: %s\n",testName, u_errorName(expectedStatus), u_errorName(status));
|
||||
}
|
||||
}
|
||||
|
@ -1078,13 +1115,13 @@ void TestIDNA::testConformance(const char* toASCIIName, TestFunc toASCII,
|
|||
IDNToASCIIName, FALSE,
|
||||
conformanceTestCases[i].expectedStatus,
|
||||
TRUE,
|
||||
(conformanceTestCases[i].expectedStatus != U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR),
|
||||
(conformanceTestCases[i].expectedStatus != U_IDNA_UNASSIGNED_ERROR),
|
||||
IDNToASCII);
|
||||
|
||||
testAPI(src,expected,
|
||||
toASCIIName, FALSE,
|
||||
conformanceTestCases[i].expectedStatus, TRUE,
|
||||
(conformanceTestCases[i].expectedStatus != U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR),
|
||||
(conformanceTestCases[i].expectedStatus != U_IDNA_UNASSIGNED_ERROR),
|
||||
toASCII);
|
||||
}
|
||||
|
||||
|
@ -1474,11 +1511,15 @@ void TestIDNA::testCompareReferenceImpl(const UChar* src, int32_t srcLen){
|
|||
asciiLen = idnaref_toASCII(labelUChars, label.length()-1,ascii,asciiCapacity,
|
||||
UIDNA_DEFAULT,&parseError,&expectedStatus);
|
||||
|
||||
if(expectedStatus == U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR){
|
||||
if(expectedStatus == U_IDNA_UNASSIGNED_ERROR){
|
||||
expectedStatus = U_ZERO_ERROR;
|
||||
asciiLen = idnaref_toASCII(labelUChars, label.length()-1,ascii,asciiCapacity,
|
||||
UIDNA_ALLOW_UNASSIGNED,&parseError,&expectedStatus);
|
||||
expectedStatus = U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR;
|
||||
if(expectedStatus==U_BUFFER_OVERFLOW_ERROR){
|
||||
errln("idnaref_toASCII failed. Error:" + UnicodeString(u_errorName(expectedStatus)));
|
||||
return;
|
||||
}
|
||||
expectedStatus = U_IDNA_UNASSIGNED_ERROR;
|
||||
}
|
||||
|
||||
testAPI(labelUChars,ascii, "uidna_toASCII",FALSE,
|
||||
|
@ -1488,11 +1529,15 @@ void TestIDNA::testCompareReferenceImpl(const UChar* src, int32_t srcLen){
|
|||
expectedStatus = U_ZERO_ERROR;
|
||||
uniLen = idnaref_toUnicode(ascii, asciiLen, uni,uniCapacity,UIDNA_DEFAULT,
|
||||
&parseError,&expectedStatus);
|
||||
if(expectedStatus == U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR){
|
||||
if(expectedStatus == U_IDNA_UNASSIGNED_ERROR){
|
||||
expectedStatus = U_ZERO_ERROR;
|
||||
uniLen = idnaref_toUnicode(ascii, asciiLen, uni,uniCapacity,UIDNA_DEFAULT,
|
||||
&parseError,&expectedStatus);
|
||||
expectedStatus = U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR;
|
||||
if(expectedStatus==U_BUFFER_OVERFLOW_ERROR){
|
||||
errln("idnaref_toASCII failed. Error:" + UnicodeString(u_errorName(expectedStatus)));
|
||||
return;
|
||||
}
|
||||
expectedStatus = U_IDNA_UNASSIGNED_ERROR;
|
||||
}
|
||||
testAPI(ascii,uni,"uidna_toUnicode",FALSE,expectedStatus,TRUE, FALSE, uidna_toUnicode);
|
||||
}
|
||||
|
@ -1504,7 +1549,7 @@ void TestIDNA::TestIDNAMonkeyTest(){
|
|||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
getInstance(status); // Init prep
|
||||
|
||||
/*
|
||||
for(int i=0; i<loopCount; i++){
|
||||
source.truncate(0);
|
||||
getTestSource(source);
|
||||
|
@ -1512,14 +1557,20 @@ void TestIDNA::TestIDNAMonkeyTest(){
|
|||
testCompareReferenceImpl(source.getBuffer(),source.length()-1);
|
||||
source.releaseBuffer();
|
||||
}
|
||||
/* for debugging
|
||||
source.append("\\U000E5BC8\\U00025112\\U00016846\\U0001B375\\U0002EDE4"
|
||||
"\\U00016E18\\U00010B84\\U000E1639\\U0001C3BE\\u336B\\u5F66"
|
||||
"\\u2AA6\\uD817\\u0000");
|
||||
source = source.unescape();
|
||||
testCompareReferenceImpl(source.getBuffer(),source.length()-1);
|
||||
source.releaseBuffer();
|
||||
*/
|
||||
/* for debugging */
|
||||
source.append( "\\u2109\\u3E1B\\U000E65CA\\U0001CAC5" );
|
||||
|
||||
source = source.unescape();
|
||||
//testCompareReferenceImpl(source.getBuffer(),source.length());
|
||||
debug(source.getBuffer(),source.length(),UIDNA_ALLOW_UNASSIGNED);
|
||||
source.releaseBuffer();
|
||||
|
||||
|
||||
source.truncate(0);
|
||||
source.append("\\uCF18\\U00021161\\U000EEF11\\U0002BB82\\U0001D63C");
|
||||
debug(source.getBuffer(),source.length(),UIDNA_ALLOW_UNASSIGNED);
|
||||
source.releaseBuffer();
|
||||
|
||||
delete TestIDNA::prep;
|
||||
TestIDNA::prep = NULL;
|
||||
|
|
|
@ -74,7 +74,7 @@ private:
|
|||
void testCompare(const char* testName, CompareFunc func);
|
||||
void testChaining(const char* toASCIIName, TestFunc toASCII,
|
||||
const char* toUnicodeName, TestFunc toUnicode);
|
||||
|
||||
void debug(const UChar* src, int32_t srcLength, int32_t options);
|
||||
// main testing functions
|
||||
void testAPI(const UChar *src, const UChar *expected, const char *testName,
|
||||
UBool useSTD3ASCIIRules, UErrorCode expectedStatus,
|
||||
|
|
8
icu4c/source/tools/gensprep/.cvsignore
Normal file
8
icu4c/source/tools/gensprep/.cvsignore
Normal file
|
@ -0,0 +1,8 @@
|
|||
*.d
|
||||
*.pdb
|
||||
Debug
|
||||
Makefile
|
||||
Release
|
||||
gensprep
|
||||
gensprep.8
|
||||
gensprep.plg
|
102
icu4c/source/tools/gensprep/Makefile.in
Normal file
102
icu4c/source/tools/gensprep/Makefile.in
Normal file
|
@ -0,0 +1,102 @@
|
|||
## Makefile.in for ICU - tools/gensprep
|
||||
## Copyright (c) 2001-2003, International Business Machines Corporation and
|
||||
## others. All Rights Reserved.
|
||||
## Steven R. Loomis/Markus W. Scherer
|
||||
|
||||
## Source directory information
|
||||
srcdir = @srcdir@
|
||||
top_srcdir = @top_srcdir@
|
||||
|
||||
top_builddir = ../..
|
||||
|
||||
include $(top_builddir)/icudefs.mk
|
||||
|
||||
##
|
||||
|
||||
SECTION = 8
|
||||
|
||||
MAN_FILES = $(TARGET:$(EXEEXT)=).$(SECTION)
|
||||
|
||||
## Build directory information
|
||||
subdir = tools/gensprep
|
||||
|
||||
ICUDATADIR=$(top_builddir)/data
|
||||
UNICODEDATADIR=$(top_srcdir)/../data/unidata
|
||||
|
||||
## Extra files to remove for 'make clean'
|
||||
CLEANFILES = *~ $(DEPS) $(RES_FILES) $(TEST_FILES) $(MAN_FILES)
|
||||
|
||||
## Target information
|
||||
TARGET = gensprep$(EXEEXT)
|
||||
|
||||
CPPFLAGS += -I$(top_builddir)/common -I$(top_srcdir)/common -I$(srcdir)/../toolutil
|
||||
LIBS = $(LIBICUTOOLUTIL) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
|
||||
|
||||
OBJECTS = gensprep.o store.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
## List of phony targets
|
||||
.PHONY : all all-local install install-local clean clean-local \
|
||||
distclean distclean-local dist dist-local check \
|
||||
check-local build-data install-man
|
||||
|
||||
## Clear suffix list
|
||||
.SUFFIXES :
|
||||
|
||||
## List of standard targets
|
||||
all: all-local
|
||||
install: install-local
|
||||
clean: clean-local
|
||||
distclean : distclean-local
|
||||
dist: dist-local
|
||||
check: all check-local
|
||||
|
||||
all-local: $(TARGET) build-data $(MAN_FILES)
|
||||
|
||||
install-local: all-local install-man
|
||||
$(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
|
||||
$(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)/$(TARGET)
|
||||
|
||||
# man page
|
||||
install-man: $(MAN_FILES)
|
||||
$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
|
||||
$(INSTALL_DATA) $< $(DESTDIR)$(mandir)/man$(SECTION)
|
||||
|
||||
%.$(SECTION): $(srcdir)/%.$(SECTION).in
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
# build postscript and pdf formats
|
||||
#$(TARGET).ps: $(TARGET).$(SECTION)
|
||||
# groff -man < $< > $@
|
||||
|
||||
#$(TARGET).pdf: $(TARGET).ps
|
||||
# ps2pdf $< $@
|
||||
|
||||
dist-local:
|
||||
|
||||
clean-local:
|
||||
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
|
||||
$(RMV) $(TARGET) $(OBJECTS)
|
||||
|
||||
distclean-local: clean-local
|
||||
$(RMV) Makefile
|
||||
|
||||
check-local: all-local
|
||||
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
$(TARGET) : $(OBJECTS)
|
||||
$(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
|
||||
|
||||
ifeq (,$(MAKECMDGOALS))
|
||||
-include $(DEPS)
|
||||
else
|
||||
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
|
||||
-include $(DEPS)
|
||||
endif
|
||||
endif
|
||||
|
271
icu4c/source/tools/gensprep/filterRFC3454.pl
Executable file
271
icu4c/source/tools/gensprep/filterRFC3454.pl
Executable file
|
@ -0,0 +1,271 @@
|
|||
#/usr/bin/perl
|
||||
# Copyright (c) 2001-2003 International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
|
||||
####################################################################################
|
||||
# filterRFC3454.pl:
|
||||
# This tool filters the RFC-3454 txt file for StringPrep tables and creates a table
|
||||
# to be used in NamePrepProfile
|
||||
#
|
||||
# Author: Ram Viswanadha
|
||||
#
|
||||
####################################################################################
|
||||
|
||||
use File::Find;
|
||||
use File::Basename;
|
||||
use IO::File;
|
||||
use Cwd;
|
||||
use File::Copy;
|
||||
use Getopt::Long;
|
||||
use File::Path;
|
||||
use File::Copy;
|
||||
|
||||
$copyright = "###################\n# Copyright (C) 2003, International Business Machines\n# Corporation and others. All Rights Reserved.\n###################\n\n";
|
||||
$warning = "###################\n# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT \n###################\n\n";
|
||||
#run the program
|
||||
main();
|
||||
|
||||
#---------------------------------------------------------------------
|
||||
# The main program
|
||||
|
||||
sub main(){
|
||||
GetOptions(
|
||||
"--sourcedir=s" => \$sourceDir,
|
||||
"--destdir=s" => \$destDir,
|
||||
"--src-filename=s" => \$srcFileName,
|
||||
"--dest-filename=s" => \$destFileName,
|
||||
"--A1" => \$a1,
|
||||
"--B1" => \$b1,
|
||||
"--B2" => \$b2,
|
||||
"--C11" => \$c11,
|
||||
"--C12" => \$c12,
|
||||
"--C21" => \$c21,
|
||||
"--C22" => \$c22,
|
||||
"--C3" => \$c3,
|
||||
"--C4" => \$c4,
|
||||
"--C5" => \$c5,
|
||||
"--C6" => \$c6,
|
||||
"--C7" => \$c7,
|
||||
"--C8" => \$c8,
|
||||
"--C9" => \$c9,
|
||||
"--ldh-chars" => \$writeLDHChars,
|
||||
);
|
||||
usage() unless defined $sourceDir;
|
||||
usage() unless defined $destDir;
|
||||
usage() unless defined $srcFileName;
|
||||
usage() unless defined $destFileName;
|
||||
|
||||
$infile = $sourceDir."/".$srcFileName;
|
||||
$inFH = IO::File->new($infile,"r")
|
||||
or die "could not open the file $infile for reading: $! \n";
|
||||
$outfile = $destDir."/".$destFileName;
|
||||
|
||||
unlink($outfile);
|
||||
$outFH = IO::File->new($outfile,"a")
|
||||
or die "could not open the file $outfile for writing: $! \n";
|
||||
print $outFH $copyright;
|
||||
print $outFH $warning;
|
||||
close($outFH);
|
||||
|
||||
while(defined ($line=<$inFH>)){
|
||||
next unless $line=~ /Start\sTable/;
|
||||
if($line =~ /A.1/){
|
||||
createUnassignedTable($inFH,$outfile);
|
||||
}
|
||||
if($line =~ /B.1/ && defined $b1){
|
||||
createCaseMapNoNorm($inFH,$outfile);
|
||||
}
|
||||
if($line =~ /B.2/ && defined $b2){
|
||||
createCaseMap($inFH,$outfile);
|
||||
}
|
||||
if($line =~ /C.1.1/ && defined $c11 ){
|
||||
createProhibitedTable($inFH,$outfile,$line);
|
||||
}
|
||||
if($line =~ /C.1.2/ && defined $c12 ){
|
||||
createProhibitedTable($inFH,$outfile,$line);
|
||||
}
|
||||
if($line =~ /C.2.1/ && defined $c21 ){
|
||||
createProhibitedTable($inFH,$outfile,$line);
|
||||
}
|
||||
if($line =~ /C.2.2/ && defined $c22 ){
|
||||
createProhibitedTable($inFH,$outfile,$line);
|
||||
}
|
||||
if($line =~ /C.3/ && defined $c3 ){
|
||||
createProhibitedTable($inFH,$outfile,$line);
|
||||
}
|
||||
if($line =~ /C.4/ && defined $c4 ){
|
||||
createProhibitedTable($inFH,$outfile,$line);
|
||||
}
|
||||
if($line =~ /C.5/ && defined $c5 ){
|
||||
createProhibitedTable($inFH,$outfile,$line);
|
||||
}
|
||||
if($line =~ /C.6/ && defined $c6 ){
|
||||
createProhibitedTable($inFH,$outfile,$line);
|
||||
}
|
||||
if($line =~ /C.7/ && defined $c7 ){
|
||||
createProhibitedTable($inFH,$outfile,$line);
|
||||
}
|
||||
if($line =~ /C.8/ && defined $c8 ){
|
||||
createProhibitedTable($inFH,$outfile,$line);
|
||||
}
|
||||
if($line =~ /C.9/ && defined $c9 ){
|
||||
createProhibitedTable($inFH,$outfile,$line);
|
||||
}
|
||||
}
|
||||
if( defined $writeLDHChars){
|
||||
createLDHCharTable($inFH, $outfile);
|
||||
}
|
||||
close($inFH);
|
||||
}
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
sub readPrint{
|
||||
local ($inFH, $outFH,$comment, $table) = @_;
|
||||
$count = 0;
|
||||
print $outFH $comment."\n";
|
||||
while(defined ($line = <$inFH>)){
|
||||
next if $line =~ /Hoffman\s\&\sBlanchet/; # ignore heading
|
||||
next if $line =~ /RFC\s3454/; # ignore heading
|
||||
next if $line =~ /\f/; # ignore form feed
|
||||
next if $line eq "\n"; # ignore blank lines
|
||||
# break if "End Table" is found
|
||||
if( $line =~ /End\sTable/){
|
||||
print $outFH "\n# Total code points $count\n\n";
|
||||
return;
|
||||
}
|
||||
if($print==1){
|
||||
print $line;
|
||||
}
|
||||
$line =~ s/-/../;
|
||||
$line =~ s/^\s+//;
|
||||
if($line =~ /\;/){
|
||||
}else{
|
||||
$line =~ s/$/;/;
|
||||
}
|
||||
if($table =~ /A/ ){
|
||||
($code, $noise) = split /;/ , $line;
|
||||
$line = $code."; ; UNASSIGNED\n";
|
||||
}elsif ( $table =~ /B\.1/ ){
|
||||
$line =~ s/Map to nothing/MAP/;
|
||||
}elsif ( $table =~ /B\.2/ ){
|
||||
$line =~ s/Case map/MAP/;
|
||||
$line =~ s/Additional folding/MAP/;
|
||||
}elsif ( $table =~ /C/ ) {
|
||||
($code, $noise) = split /;/ , $line;
|
||||
$line = $code."; ; PROHIBITED\n";
|
||||
}
|
||||
if($line =~ /\.\./){
|
||||
($code, $noise) = split /;/ , $line;
|
||||
($startStr, $endStr ) = split /\.\./, $code;
|
||||
$start = atoi($startStr);
|
||||
$end = atoi($endStr);
|
||||
#print $start." ".$end."\n";
|
||||
while($start <= $end){
|
||||
$count++;
|
||||
$start++;
|
||||
}
|
||||
}else{
|
||||
$count++;
|
||||
}
|
||||
print $outFH $line;
|
||||
}
|
||||
}
|
||||
#-----------------------------------------------------------------------
|
||||
sub atoi {
|
||||
my $t;
|
||||
foreach my $d (split(//, shift())) {
|
||||
$t = $t * 16 + $d;
|
||||
}
|
||||
return $t;
|
||||
}
|
||||
#-----------------------------------------------------------------------
|
||||
sub createUnassignedTable{
|
||||
($inFH,$outfile) = @_;
|
||||
$outFH = IO::File->new($outfile,"a")
|
||||
or die "could not open the file $outfile for writing: $! \n";
|
||||
$comment = "# This table contains code points from Table A.1 from RFC 3454\n";
|
||||
readPrint($inFH,$outFH, $comment, "A");
|
||||
close($outFH);
|
||||
}
|
||||
#-----------------------------------------------------------------------
|
||||
sub createCaseMapNoNorm{
|
||||
($inFH,$outfile) = @_;
|
||||
$outFH = IO::File->new($outfile,"a")
|
||||
or die "could not open the file $outfile for writing: $! \n";
|
||||
$comment = "# This table contains code points from Table B.1 from RFC 3454\n";
|
||||
readPrint($inFH,$outFH,$comment, "B.1");
|
||||
close($outFH);
|
||||
}
|
||||
#-----------------------------------------------------------------------
|
||||
sub createCaseMap{
|
||||
($inFH,$outfile) = @_;
|
||||
$outFH = IO::File->new($outfile,"a")
|
||||
or die "could not open the file $outfile for writing: $! \n";
|
||||
$comment = $warning."# This table contains code points from Table B.2 from RFC 3454\n";
|
||||
readPrint($inFH,$outFH,$comment, "B.2");
|
||||
close($outFH);
|
||||
}
|
||||
#-----------------------------------------------------------------------
|
||||
sub createProhibitedTable{
|
||||
($inFH,$outfile,$line) = @_;
|
||||
$line =~ s/Start//;
|
||||
$line =~ s/-//g;
|
||||
$comment = "# code points from $line";
|
||||
|
||||
$outFH = IO::File->new($outfile, "a")
|
||||
or die "could not open the file $outfile for writing: $! \n";
|
||||
readPrint($inFH,$outFH,$comment, "C");
|
||||
close($outFH);
|
||||
}
|
||||
#-----------------------------------------------------------------------
|
||||
sub createLDHCharTable{
|
||||
($inFH,$outfile,$line) = @_;
|
||||
$comment ="# code points for LDH chars \n";
|
||||
|
||||
$outFH = IO::File->new($outfile, "a")
|
||||
or die "could not open the file $outfile for writing: $! \n";
|
||||
print $outFH $comment;
|
||||
print $outFH "002E; ; LABEL_SEPARATOR\n";
|
||||
print $outFH "3002; ; LABEL_SEPARATOR\n";
|
||||
print $outFH "FF0E; ; LABEL_SEPARATOR\n";
|
||||
print $outFH "FF61; ; LABEL_SEPARATOR\n";
|
||||
print $outFH "\n# Total code points 4\n";
|
||||
close($outFH);
|
||||
}
|
||||
#-----------------------------------------------------------------------
|
||||
sub usage {
|
||||
print << "END";
|
||||
Usage:
|
||||
filterRFC3454.pl
|
||||
Options:
|
||||
--sourcedir=<directory>
|
||||
--destdir=<directory>
|
||||
--src-filename=<name of RFC file>
|
||||
--dest-filename=<name of destination file>
|
||||
--A1 Generate data for table A1
|
||||
--B1 Generate data for table B1
|
||||
--B2 Generate data for table B2
|
||||
--C11 Generate data for table C11
|
||||
--C12 Generate data for table C12
|
||||
--C21 Generate data for table C21
|
||||
--C22 Generate data for table C22
|
||||
--C3 Generate data for table C3
|
||||
--C4 Generate data for table C4
|
||||
--C5 Generate data for table C5
|
||||
--C6 Generate data for table C6
|
||||
--C7 Generate data for table C7
|
||||
--C8 Generate data for table C8
|
||||
--C9 Generate data for table C9
|
||||
--ldh-chars Generate data for LDH chars used in IDNA
|
||||
|
||||
|
||||
e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt --dest-filename=NamePrepProfile.txt --A1 --B2 --C12 --C21 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9 --ldh-chars
|
||||
|
||||
filterRFC3454.pl filters the RFC file and creates String prep table files.
|
||||
The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt
|
||||
|
||||
END
|
||||
exit(0);
|
||||
}
|
||||
|
||||
|
102
icu4c/source/tools/gensprep/gensprep.8.in
Normal file
102
icu4c/source/tools/gensprep/gensprep.8.in
Normal file
|
@ -0,0 +1,102 @@
|
|||
.\" Hey, Emacs! This is -*-nroff-*- you know...
|
||||
.\"
|
||||
.\" gensprep.8: manual page for the gensprep utility
|
||||
.\"
|
||||
.\" Copyright (C) 2003 IBM, Inc. and others.
|
||||
.\"
|
||||
.TH gensprep 8 "18 March 2003" "ICU MANPAGE" "ICU @VERSION@ Manual"
|
||||
.SH NAME
|
||||
.B gensprep
|
||||
\- compile StringPrep data from files filtered by filterRFC3454.pl
|
||||
.SH SYNOPSIS
|
||||
.B gensprep
|
||||
[
|
||||
.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
|
||||
]
|
||||
[
|
||||
.BR "\-v\fP, \fB\-\-verbose"
|
||||
]
|
||||
[
|
||||
.BI "\-c\fP, \fB\-\-copyright"
|
||||
]
|
||||
[
|
||||
.BI "\-s\fP, \fB\-\-sourcedir" " source"
|
||||
]
|
||||
[
|
||||
.BI "\-d\fP, \fB\-\-destdir" " destination"
|
||||
]
|
||||
.SH DESCRIPTION
|
||||
.B gensprep
|
||||
reads filtered RFC 3454 files and compiles their
|
||||
information into a binary form.
|
||||
The resulting file,
|
||||
.BR <name>.icu ,
|
||||
can then be read directly by ICU, or used by
|
||||
.BR pkgdata (8)
|
||||
for incorporation into a larger archive or library.
|
||||
.LP
|
||||
The files read by
|
||||
.B gensprep
|
||||
are described in the
|
||||
.B FILES
|
||||
section.
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
|
||||
Print help about usage and exit.
|
||||
.TP
|
||||
.BR "\-v\fP, \fB\-\-verbose"
|
||||
Display extra informative messages during execution.
|
||||
.TP
|
||||
.BI "\-c\fP, \fB\-\-copyright"
|
||||
Include a copyright notice into the binary data.
|
||||
.TP
|
||||
.BI "\-s\fP, \fB\-\-sourcedir" " source"
|
||||
Set the source directory to
|
||||
.IR source .
|
||||
The default source directory is specified by the environment variable
|
||||
.BR ICU_DATA .
|
||||
.TP
|
||||
.BI "\-d\fP, \fB\-\-destdir" " destination"
|
||||
Set the destination directory to
|
||||
.IR destination .
|
||||
The default destination directory is specified by the environment variable
|
||||
.BR ICU_DATA .
|
||||
.SH ENVIRONMENT
|
||||
.TP 10
|
||||
.B ICU_DATA
|
||||
Specifies the directory containing ICU data. Defaults to
|
||||
.BR @thepkgicudatadir@/@PACKAGE@/@VERSION@/ .
|
||||
Some tools in ICU depend on the presence of the trailing slash. It is thus
|
||||
important to make sure that it is present if
|
||||
.B ICU_DATA
|
||||
is set.
|
||||
.SH FILES
|
||||
The following files are read by
|
||||
.B gensprep
|
||||
and are looked for in the
|
||||
.I source
|
||||
/misc for rfc3454_*.txt files and in
|
||||
.I source
|
||||
/unidata for NormalizationCorrections.txt.
|
||||
.TP 20
|
||||
.B rfc3453_A_1.txt
|
||||
Contains the list of unassigned codepoints in Unicode version 3.2.0.\|.\|..
|
||||
.TP
|
||||
.B rfc3454_B_1.txt
|
||||
Contains the list of code points that are commonly mapped to nothing.\|.\|..
|
||||
.TP
|
||||
.B rfc3454_B_2.txt
|
||||
Contains the list of mappings for casefolding of code points when Normalization form NFKC is specified.\|.\|..
|
||||
.TP
|
||||
.B rfc3454_C_X.txt
|
||||
Contains the list of code points that are prohibited for IDNA.
|
||||
.TP
|
||||
.B NormalizationCorrections.txt
|
||||
Contains the list of code points whose normalization has changed since Unicode Version 3.2.0.
|
||||
.SH VERSION
|
||||
@VERSION@
|
||||
.SH COPYRIGHT
|
||||
Copyright (C) 2000-2002 IBM, Inc. and others.
|
||||
.SH SEE ALSO
|
||||
.BR pkgdata (8)
|
425
icu4c/source/tools/gensprep/gensprep.c
Normal file
425
icu4c/source/tools/gensprep/gensprep.c
Normal file
|
@ -0,0 +1,425 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: gensprep.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003-02-06
|
||||
* created by: Ram Viswanadha
|
||||
*
|
||||
* This program reads the Profile.txt files,
|
||||
* parses them, and extracts the data for StringPrep profile.
|
||||
* It then preprocesses it and writes a binary file for efficient use
|
||||
* in various StringPrep conversion processes.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unewdata.h"
|
||||
#include "uoptions.h"
|
||||
#include "uparse.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "uprops.h"
|
||||
#include "sprpimpl.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
#include "gensprep.h"
|
||||
U_CDECL_END
|
||||
|
||||
#ifdef WIN32
|
||||
# pragma warning(disable: 4100)
|
||||
#endif
|
||||
|
||||
UBool beVerbose=FALSE, haveCopyright=TRUE;
|
||||
|
||||
#define NORM_CORRECTIONS_FILE_NAME "NormalizationCorrections.txt"
|
||||
|
||||
/* prototypes --------------------------------------------------------------- */
|
||||
|
||||
static void
|
||||
parseMappings(const char *filename, UBool reportError, UErrorCode *pErrorCode);
|
||||
|
||||
static void
|
||||
parseNormalizationCorrections(const char *filename, UErrorCode *pErrorCode);
|
||||
|
||||
|
||||
static void
|
||||
printMapping(UChar32 cp,UChar32* mapping, int32_t mappingLength);
|
||||
|
||||
static const char *UNIDATA_DIR = "unidata";
|
||||
static const char *MISC_DIR = "misc";
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
static UOption options[]={
|
||||
UOPTION_HELP_H,
|
||||
UOPTION_HELP_QUESTION_MARK,
|
||||
UOPTION_VERBOSE,
|
||||
UOPTION_COPYRIGHT,
|
||||
UOPTION_DESTDIR,
|
||||
UOPTION_SOURCEDIR,
|
||||
UOPTION_PACKAGE_NAME,
|
||||
UOPTION_BUNDLE_NAME,
|
||||
{ "normalization", NULL, NULL, NULL, 'n', UOPT_REQUIRES_ARG, 0 },
|
||||
{ "check-bidi", NULL, NULL, NULL, 'k', UOPT_NO_ARG, 0},
|
||||
{ "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 },
|
||||
};
|
||||
|
||||
enum{
|
||||
HELP,
|
||||
HELP_QUESTION_MARK,
|
||||
VERBOSE,
|
||||
COPYRIGHT,
|
||||
DESTDIR,
|
||||
SOURCEDIR,
|
||||
PACKAGE_NAME,
|
||||
BUNDLE_NAME,
|
||||
NORMALIZE,
|
||||
CHECK_BIDI,
|
||||
UNICODE_VERSION
|
||||
};
|
||||
|
||||
static int printHelp(int argc, char* argv[]){
|
||||
/*
|
||||
* Broken into chucks because the C89 standard says the minimum
|
||||
* required supported string length is 509 bytes.
|
||||
*/
|
||||
fprintf(stderr,
|
||||
"Usage: %s [-options] [file_name]\n"
|
||||
"\n"
|
||||
"Read the files specified and\n"
|
||||
"create a binary file [package-name]_[bundle-name]." DATA_TYPE " with the StringPrep profile data\n"
|
||||
"\n",
|
||||
argv[0]);
|
||||
fprintf(stderr,
|
||||
"Options:\n"
|
||||
"\t-h or -? or --help print this usage text\n"
|
||||
"\t-v or --verbose verbose output\n"
|
||||
"\t-c or --copyright include a copyright notice\n");
|
||||
fprintf(stderr,
|
||||
"\t-d or --destdir destination directory, followed by the path\n"
|
||||
"\t-s or --sourcedir source directory of ICU data, followed by the path\n"
|
||||
"\t-b or --bundle-name generate the ouput data file with the name specified\n"
|
||||
"\t-p or --package-name prepend the output data file name with the package name specified\n"
|
||||
"\t-n or --normalize turn on the option for normalization and include mappings\n"
|
||||
"\t from NormalizationCorrections.txt from the given path,\n"
|
||||
"\t e.g: /test/icu/source/data/unidata\n"
|
||||
"\t-k or --check-bidi turn on the option for checking for BiDi in the profile\n"
|
||||
"\t-u or --unicode version of Unicode to be used with this profile followed by the version\n"
|
||||
);
|
||||
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
|
||||
extern int
|
||||
main(int argc, char* argv[]) {
|
||||
#if !UCONFIG_NO_IDNA
|
||||
char* filename = NULL;
|
||||
#endif
|
||||
const char *srcDir=NULL, *destDir=NULL, *icuUniDataDir=NULL;
|
||||
const char *packageName=NULL, *bundleName=NULL, *inputFileName = NULL;
|
||||
char *basename=NULL;
|
||||
int32_t sprepOptions = 0;
|
||||
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
|
||||
U_MAIN_INIT_ARGS(argc, argv);
|
||||
|
||||
/* preset then read command line options */
|
||||
options[DESTDIR].value=u_getDataDirectory();
|
||||
options[SOURCEDIR].value="";
|
||||
options[UNICODE_VERSION].value="0"; /* don't assume the unicode version */
|
||||
options[BUNDLE_NAME].value = DATA_NAME;
|
||||
options[PACKAGE_NAME].value = U_ICUDATA_NAME;
|
||||
options[NORMALIZE].value = "";
|
||||
|
||||
argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
|
||||
|
||||
/* error handling, printing usage message */
|
||||
if(argc<0) {
|
||||
fprintf(stderr,
|
||||
"error in command line argument \"%s\"\n",
|
||||
argv[-argc]);
|
||||
}
|
||||
if(argc<0 || options[HELP].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
|
||||
return printHelp(argc, argv);
|
||||
|
||||
}
|
||||
|
||||
/* get the options values */
|
||||
beVerbose=options[VERBOSE].doesOccur;
|
||||
haveCopyright=options[COPYRIGHT].doesOccur;
|
||||
srcDir=options[SOURCEDIR].value;
|
||||
destDir=options[DESTDIR].value;
|
||||
packageName = options[PACKAGE_NAME].value;
|
||||
bundleName = options[BUNDLE_NAME].value;
|
||||
icuUniDataDir = options[NORMALIZE].value;
|
||||
|
||||
if(argc<2) {
|
||||
/* print the help message */
|
||||
return printHelp(argc, argv);
|
||||
} else {
|
||||
inputFileName = argv[1];
|
||||
}
|
||||
if(!options[UNICODE_VERSION].doesOccur){
|
||||
return printHelp(argc, argv);
|
||||
}
|
||||
#if UCONFIG_NO_IDNA
|
||||
|
||||
fprintf(stderr,
|
||||
"gensprep writes dummy " U_ICUDATA_NAME "_" _SPREP_DATA_NAME "." DATA_TYPE
|
||||
" because UCONFIG_NO_IDNA is set, \n"
|
||||
"see icu/source/common/unicode/uconfig.h\n");
|
||||
generateData(destDir);
|
||||
|
||||
#else
|
||||
|
||||
setUnicodeVersion(options[UNICODE_VERSION].value);
|
||||
filename = (char* ) uprv_malloc(uprv_strlen(srcDir) + 300); /* hopefully this should be enough */
|
||||
|
||||
/* prepare the filename beginning with the source dir */
|
||||
if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){
|
||||
filename[0] = 0x2E;
|
||||
filename[1] = U_FILE_SEP_CHAR;
|
||||
uprv_strcpy(filename+2,srcDir);
|
||||
}else{
|
||||
uprv_strcpy(filename, srcDir);
|
||||
}
|
||||
|
||||
basename=filename+uprv_strlen(filename);
|
||||
if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
|
||||
*basename++=U_FILE_SEP_CHAR;
|
||||
}
|
||||
|
||||
/* initialize */
|
||||
init();
|
||||
|
||||
/* process the file */
|
||||
uprv_strcpy(basename,inputFileName);
|
||||
parseMappings(filename,FALSE, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "Could not open file %s for reading. Error: %s \n", filename, u_errorName(errorCode));
|
||||
return errorCode;
|
||||
}
|
||||
|
||||
if(options[NORMALIZE].doesOccur){
|
||||
/* set up directory for NormalizationCorrections.txt */
|
||||
uprv_strcpy(filename,icuUniDataDir);
|
||||
basename=filename+uprv_strlen(filename);
|
||||
if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
|
||||
*basename++=U_FILE_SEP_CHAR;
|
||||
}
|
||||
|
||||
*basename++=U_FILE_SEP_CHAR;
|
||||
uprv_strcpy(basename,NORM_CORRECTIONS_FILE_NAME);
|
||||
|
||||
parseNormalizationCorrections(filename,&errorCode);
|
||||
if(U_FAILURE(errorCode)){
|
||||
fprintf(stderr,"Could not open file %s for reading \n", filename);
|
||||
return errorCode;
|
||||
}
|
||||
sprepOptions |= _SPREP_NORMALIZATION_ON;
|
||||
}
|
||||
|
||||
if(options[CHECK_BIDI].doesOccur){
|
||||
sprepOptions |= _SPREP_CHECK_BIDI_ON;
|
||||
}
|
||||
|
||||
setOptions(sprepOptions);
|
||||
|
||||
/* process parsed data */
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
/* write the data file */
|
||||
generateData(destDir, packageName, bundleName);
|
||||
|
||||
cleanUpData();
|
||||
}
|
||||
|
||||
uprv_free(filename);
|
||||
|
||||
#endif
|
||||
|
||||
return errorCode;
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
static void U_CALLCONV
|
||||
normalizationCorrectionsLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
uint32_t mapping[40];
|
||||
char *end, *s;
|
||||
uint32_t code;
|
||||
int32_t length;
|
||||
UVersionInfo version;
|
||||
UVersionInfo thisVersion;
|
||||
|
||||
/* get the character code, field 0 */
|
||||
code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "gensprep: error parsing NormalizationCorrections.txt mapping at %s\n", fields[0][0]);
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
/* Original (erroneous) decomposition */
|
||||
s = fields[1][0];
|
||||
|
||||
/* parse the mapping string */
|
||||
length=u_parseCodePoints(s, mapping, sizeof(mapping)/4, pErrorCode);
|
||||
|
||||
/* ignore corrected decomposition */
|
||||
|
||||
u_versionFromString(version,fields[3][0] );
|
||||
u_versionFromString(thisVersion, "3.2.0");
|
||||
|
||||
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "gensprep error parsing NormalizationCorrections.txt of U+%04lx - %s\n",
|
||||
(long)code, u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
|
||||
/* store the mapping */
|
||||
if( version[0] > thisVersion[0] ||
|
||||
((version[0]==thisVersion[0]) && (version[1] > thisVersion[1]))
|
||||
){
|
||||
storeMapping(code,mapping, length, USPREP_MAP, pErrorCode);
|
||||
}
|
||||
setUnicodeVersionNC(version);
|
||||
}
|
||||
|
||||
static void
|
||||
parseNormalizationCorrections(const char *filename, UErrorCode *pErrorCode) {
|
||||
char *fields[4][2];
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, 4, normalizationCorrectionsLineFn, NULL, pErrorCode);
|
||||
|
||||
/* fprintf(stdout,"Number of code points that have NormalizationCorrections mapping with length >1 : %i\n",len); */
|
||||
|
||||
if(U_FAILURE(*pErrorCode) && ( *pErrorCode!=U_FILE_ACCESS_ERROR)) {
|
||||
fprintf(stderr, "gensprep error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
static void U_CALLCONV
|
||||
strprepProfileLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
uint32_t mapping[40];
|
||||
char *end, *map;
|
||||
uint32_t code;
|
||||
int32_t length;
|
||||
/*UBool* mapWithNorm = (UBool*) context;*/
|
||||
const char* typeName;
|
||||
uint32_t rangeStart=0,rangeEnd =0;
|
||||
const char* filename = (const char*) context;
|
||||
|
||||
typeName = fields[2][0];
|
||||
map = fields[1][0];
|
||||
|
||||
if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)){
|
||||
fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
|
||||
return;
|
||||
}
|
||||
|
||||
/* store the range */
|
||||
storeRange(rangeStart,rangeEnd,USPREP_UNASSIGNED, pErrorCode);
|
||||
|
||||
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)){
|
||||
fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
|
||||
return;
|
||||
}
|
||||
|
||||
/* store the range */
|
||||
storeRange(rangeStart,rangeEnd,USPREP_PROHIBITED, pErrorCode);
|
||||
|
||||
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
|
||||
|
||||
/* get the character code, field 0 */
|
||||
code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
|
||||
if(end<=fields[0][0] || end!=fields[0][1]) {
|
||||
fprintf(stderr, "gensprep: syntax error in field 0 at %s\n", fields[0][0]);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/* parse the mapping string */
|
||||
length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
|
||||
|
||||
/* store the mapping */
|
||||
storeMapping(code,mapping, length,USPREP_MAP, pErrorCode);
|
||||
|
||||
}else if(uprv_strstr(typeName, usprepTypeNames[USPREP_LABEL_SEPARATOR])!=NULL){
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)){
|
||||
fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
|
||||
return;
|
||||
}
|
||||
/* store the range */
|
||||
storeRange(rangeStart,rangeEnd,USPREP_LABEL_SEPARATOR, pErrorCode);
|
||||
}else{
|
||||
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
||||
}
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "gensprep error parsing %s line %s at %s\n",filename,
|
||||
fields[0][0],fields[2][0],u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
parseMappings(const char *filename, UBool reportError, UErrorCode *pErrorCode) {
|
||||
char *fields[3][2];
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode);
|
||||
|
||||
/*fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);*/
|
||||
|
||||
if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
|
||||
fprintf(stderr, "gensprep error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_IDNA */
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
206
icu4c/source/tools/gensprep/gensprep.dsp
Normal file
206
icu4c/source/tools/gensprep/gensprep.dsp
Normal file
|
@ -0,0 +1,206 @@
|
|||
# Microsoft Developer Studio Project File - Name="gensprep" - Package Owner=<4>
|
||||
# Microsoft Developer Studio Generated Build File, Format Version 6.00
|
||||
# ** DO NOT EDIT **
|
||||
|
||||
# TARGTYPE "Win32 (x86) Console Application" 0x0103
|
||||
|
||||
CFG=gensprep - Win32 Debug
|
||||
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
|
||||
!MESSAGE use the Export Makefile command and run
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "gensprep.mak".
|
||||
!MESSAGE
|
||||
!MESSAGE You can specify a configuration when running NMAKE
|
||||
!MESSAGE by defining the macro CFG on the command line. For example:
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "gensprep.mak" CFG="gensprep - Win32 Debug"
|
||||
!MESSAGE
|
||||
!MESSAGE Possible choices for configuration are:
|
||||
!MESSAGE
|
||||
!MESSAGE "gensprep - Win32 Release" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE "gensprep - Win32 Debug" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE "gensprep - Win64 Release" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE "gensprep - Win64 Debug" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE
|
||||
|
||||
# Begin Project
|
||||
# PROP AllowPerConfigDependencies 0
|
||||
# PROP Scc_ProjName ""
|
||||
# PROP Scc_LocalPath ""
|
||||
CPP=cl.exe
|
||||
RSC=rc.exe
|
||||
|
||||
!IF "$(CFG)" == "gensprep - Win32 Release"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 0
|
||||
# PROP BASE Output_Dir "Release"
|
||||
# PROP BASE Intermediate_Dir "Release"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 0
|
||||
# PROP Output_Dir "Release"
|
||||
# PROP Intermediate_Dir "Release"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
MTL=midl.exe
|
||||
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
|
||||
# ADD CPP /nologo /W3 /GX /O2 /I "..\..\common" /I "..\toolutil" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
|
||||
# ADD BASE RSC /l 0x409 /d "NDEBUG"
|
||||
# ADD RSC /l 0x409 /d "NDEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
|
||||
# ADD LINK32 icuuc.lib icutu.lib /nologo /subsystem:console /machine:I386 /libpath:"..\..\..\lib"
|
||||
# Begin Custom Build
|
||||
TargetPath=.\Release\gensprep.exe
|
||||
InputPath=.\Release\gensprep.exe
|
||||
InputName=gensprep
|
||||
SOURCE="$(InputPath)"
|
||||
|
||||
"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(TargetPath) ..\..\..\bin
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "gensprep - Win32 Debug"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 1
|
||||
# PROP BASE Output_Dir "Debug"
|
||||
# PROP BASE Intermediate_Dir "Debug"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 1
|
||||
# PROP Output_Dir "Debug"
|
||||
# PROP Intermediate_Dir "Debug"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
MTL=midl.exe
|
||||
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
|
||||
# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /I "..\..\common" /I "..\toolutil" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
|
||||
# ADD BASE RSC /l 0x409 /d "_DEBUG"
|
||||
# ADD RSC /l 0x409 /d "_DEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
|
||||
# ADD LINK32 icutud.lib icuucd.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\..\..\lib"
|
||||
# Begin Custom Build
|
||||
TargetPath=.\Debug\gensprep.exe
|
||||
InputPath=.\Debug\gensprep.exe
|
||||
InputName=gensprep
|
||||
SOURCE="$(InputPath)"
|
||||
|
||||
"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(TargetPath) ..\..\..\bin
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "gensprep - Win64 Release"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 0
|
||||
# PROP BASE Output_Dir "Release"
|
||||
# PROP BASE Intermediate_Dir "Release"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 0
|
||||
# PROP Output_Dir "Release"
|
||||
# PROP Intermediate_Dir "Release"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
MTL=midl.exe
|
||||
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
|
||||
# ADD CPP /nologo /W3 /GX /Zi /O2 /Op /I "..\..\common" /I "..\toolutil" /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /QIA64_fmaopt /Zm600 /c
|
||||
# ADD BASE RSC /l 0x409 /d "NDEBUG"
|
||||
# ADD RSC /l 0x409 /d "NDEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:IX86 /machine:IA64
|
||||
# ADD LINK32 icuuc.lib icutu.lib /nologo /subsystem:console /machine:IX86 /libpath:"..\..\..\lib" /machine:IA64
|
||||
# Begin Custom Build
|
||||
TargetPath=.\Release\gensprep.exe
|
||||
InputPath=.\Release\gensprep.exe
|
||||
InputName=gensprep
|
||||
SOURCE="$(InputPath)"
|
||||
|
||||
"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(TargetPath) ..\..\..\bin
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "gensprep - Win64 Debug"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 1
|
||||
# PROP BASE Output_Dir "Debug"
|
||||
# PROP BASE Intermediate_Dir "Debug"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 1
|
||||
# PROP Output_Dir "Debug"
|
||||
# PROP Intermediate_Dir "Debug"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
MTL=midl.exe
|
||||
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
|
||||
# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /Op /I "..\..\common" /I "..\toolutil" /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /GZ /QIA64_fmaopt /Zm600 /c
|
||||
# ADD BASE RSC /l 0x409 /d "_DEBUG"
|
||||
# ADD RSC /l 0x409 /d "_DEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:IX86 /pdbtype:sept /machine:IA64
|
||||
# ADD LINK32 icutud.lib icuucd.lib /nologo /subsystem:console /incremental:no /debug /machine:IX86 /pdbtype:sept /libpath:"..\..\..\lib" /machine:IA64
|
||||
# Begin Custom Build
|
||||
TargetPath=.\Debug\gensprep.exe
|
||||
InputPath=.\Debug\gensprep.exe
|
||||
InputName=gensprep
|
||||
SOURCE="$(InputPath)"
|
||||
|
||||
"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(TargetPath) ..\..\..\bin
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ENDIF
|
||||
|
||||
# Begin Target
|
||||
|
||||
# Name "gensprep - Win32 Release"
|
||||
# Name "gensprep - Win32 Debug"
|
||||
# Name "gensprep - Win64 Release"
|
||||
# Name "gensprep - Win64 Debug"
|
||||
# Begin Group "Source Files"
|
||||
|
||||
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\gensprep.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\store.c
|
||||
# End Source File
|
||||
# End Group
|
||||
# Begin Group "Header Files"
|
||||
|
||||
# PROP Default_Filter "h;hpp;hxx;hm;inl"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\gensprep.h
|
||||
# End Source File
|
||||
# End Group
|
||||
# Begin Group "Resource Files"
|
||||
|
||||
# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
|
||||
# End Group
|
||||
# End Target
|
||||
# End Project
|
82
icu4c/source/tools/gensprep/gensprep.h
Normal file
82
icu4c/source/tools/gensprep/gensprep.h
Normal file
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: genidn.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003-02-06
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#ifndef __GENIDN_H__
|
||||
#define __GENIDN_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "sprpimpl.h"
|
||||
|
||||
/* file definitions */
|
||||
#define DATA_NAME "sprep"
|
||||
#define DATA_TYPE "spp"
|
||||
|
||||
/*
|
||||
* data structure that holds the IDN properties for one or more
|
||||
* code point(s) at build time
|
||||
*/
|
||||
|
||||
|
||||
/* global flags */
|
||||
extern UBool beVerbose, haveCopyright;
|
||||
|
||||
/* prototypes */
|
||||
|
||||
extern void
|
||||
setUnicodeVersion(const char *v);
|
||||
|
||||
extern void
|
||||
setUnicodeVersionNC(UVersionInfo version);
|
||||
|
||||
extern void
|
||||
init(void);
|
||||
|
||||
extern void
|
||||
storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length, UStringPrepType type, UErrorCode* status);
|
||||
extern void
|
||||
storeRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status);
|
||||
|
||||
extern void
|
||||
generateData(const char *dataDir, const char* packageName, const char* bundleName);
|
||||
|
||||
extern void
|
||||
setOptions(int32_t options);
|
||||
|
||||
extern void
|
||||
cleanUpData(void);
|
||||
|
||||
/*
|
||||
extern void
|
||||
storeIDN(uint32_t code, IDN *idn);
|
||||
|
||||
extern void
|
||||
processData(void);
|
||||
|
||||
|
||||
*/
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
||||
|
||||
|
153
icu4c/source/tools/gensprep/gensprep.vcproj
Normal file
153
icu4c/source/tools/gensprep/gensprep.vcproj
Normal file
|
@ -0,0 +1,153 @@
|
|||
<?xml version="1.0" encoding = "Windows-1252"?>
|
||||
<VisualStudioProject
|
||||
ProjectType="Visual C++"
|
||||
Version="7.00"
|
||||
Name="gensprep"
|
||||
SccProjectName=""
|
||||
SccLocalPath="">
|
||||
<Platforms>
|
||||
<Platform
|
||||
Name="Win32"/>
|
||||
</Platforms>
|
||||
<Configurations>
|
||||
<Configuration
|
||||
Name="Release|Win32"
|
||||
OutputDirectory=".\Release"
|
||||
IntermediateDirectory=".\Release"
|
||||
ConfigurationType="1"
|
||||
UseOfMFC="0"
|
||||
ATLMinimizesCRunTimeLibraryUsage="FALSE"
|
||||
CharacterSet="2">
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
InlineFunctionExpansion="2"
|
||||
ImproveFloatingPointConsistency="TRUE"
|
||||
AdditionalIncludeDirectories="..\..\common,..\toolutil"
|
||||
PreprocessorDefinitions="WIN32,NDEBUG,_CONSOLE"
|
||||
StringPooling="TRUE"
|
||||
RuntimeLibrary="4"
|
||||
EnableFunctionLevelLinking="TRUE"
|
||||
PrecompiledHeaderFile=".\Release/gensprep.pch"
|
||||
AssemblerListingLocation=".\Release/"
|
||||
ObjectFile=".\Release/"
|
||||
ProgramDataBaseFileName=".\Release/"
|
||||
WarningLevel="3"
|
||||
SuppressStartupBanner="TRUE"
|
||||
CompileAs="0"/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="copy $(TargetPath) ..\..\..\bin
|
||||
"
|
||||
Outputs="..\..\..\bin\$(InputName).exe"/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
AdditionalOptions="/MACHINE:I386"
|
||||
AdditionalDependencies="icuuc.lib icutu.lib"
|
||||
OutputFile=".\Release/gensprep.exe"
|
||||
LinkIncremental="1"
|
||||
SuppressStartupBanner="TRUE"
|
||||
AdditionalLibraryDirectories="..\..\..\lib"
|
||||
ProgramDatabaseFile=".\Release/gensprep.pdb"
|
||||
SubSystem="1"/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
TypeLibraryName=".\Release/gensprep.tlb"/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"/>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
PreprocessorDefinitions="NDEBUG"
|
||||
Culture="1033"/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Debug|Win32"
|
||||
OutputDirectory=".\Debug"
|
||||
IntermediateDirectory=".\Debug"
|
||||
ConfigurationType="1"
|
||||
UseOfMFC="0"
|
||||
ATLMinimizesCRunTimeLibraryUsage="FALSE"
|
||||
CharacterSet="2">
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories="..\..\common,..\toolutil"
|
||||
PreprocessorDefinitions="WIN32,_DEBUG,_CONSOLE"
|
||||
BasicRuntimeChecks="3"
|
||||
RuntimeLibrary="5"
|
||||
PrecompiledHeaderFile=".\Debug/gensprep.pch"
|
||||
AssemblerListingLocation=".\Debug/"
|
||||
ObjectFile=".\Debug/"
|
||||
ProgramDataBaseFileName=".\Debug/"
|
||||
WarningLevel="3"
|
||||
SuppressStartupBanner="TRUE"
|
||||
DebugInformationFormat="4"
|
||||
CompileAs="0"/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="copy $(TargetPath) ..\..\..\bin
|
||||
"
|
||||
Outputs="..\..\..\bin\$(InputName).exe"/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
AdditionalOptions="/MACHINE:I386"
|
||||
AdditionalDependencies="icutud.lib icuucd.lib"
|
||||
OutputFile=".\Debug/gensprep.exe"
|
||||
LinkIncremental="2"
|
||||
SuppressStartupBanner="TRUE"
|
||||
AdditionalLibraryDirectories="..\..\..\lib"
|
||||
GenerateDebugInformation="TRUE"
|
||||
ProgramDatabaseFile=".\Debug/gensprep.pdb"
|
||||
SubSystem="1"/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
TypeLibraryName=".\Debug/gensprep.tlb"/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"/>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
PreprocessorDefinitions="_DEBUG"
|
||||
Culture="1033"/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"/>
|
||||
</Configuration>
|
||||
</Configurations>
|
||||
<Files>
|
||||
<Filter
|
||||
Name="Source Files"
|
||||
Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat">
|
||||
<File
|
||||
RelativePath=".\gensprep.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\store.c">
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Header Files"
|
||||
Filter="h;hpp;hxx;hm;inl">
|
||||
<File
|
||||
RelativePath=".\gensprep.h">
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Resource Files"
|
||||
Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe">
|
||||
</Filter>
|
||||
</Files>
|
||||
<Globals>
|
||||
</Globals>
|
||||
</VisualStudioProject>
|
608
icu4c/source/tools/gensprep/store.c
Normal file
608
icu4c/source/tools/gensprep/store.c
Normal file
|
@ -0,0 +1,608 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: store.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003-02-06
|
||||
* created by: Ram Viswanadha
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "filestrm.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "utrie.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "unewdata.h"
|
||||
#include "gensprep.h"
|
||||
#include "uhash.h"
|
||||
|
||||
|
||||
#ifdef WIN32
|
||||
# pragma warning(disable: 4100)
|
||||
#endif
|
||||
|
||||
#define DO_DEBUG_OUT 0
|
||||
|
||||
|
||||
/**
|
||||
|
||||
This is a simple Trie with the following structure
|
||||
|
||||
16-bit USPREP sets:
|
||||
if(trieWord >= 0xFFF0){
|
||||
|
||||
UStringPrepType enum = value - 0xFFF0;
|
||||
|
||||
}else{
|
||||
|
||||
Bit
|
||||
0 ON: USPREP_PROHIBITED
|
||||
1 OFF: the next 13 bits contain the delta
|
||||
ON: the next 13 bits contain the index into the mapping array
|
||||
2..15 Contain the index into the mapping array or delta
|
||||
|
||||
}
|
||||
|
||||
|
||||
*/
|
||||
|
||||
/* file data ---------------------------------------------------------------- */
|
||||
/* indexes[] value names */
|
||||
|
||||
#if UCONFIG_NO_IDNA
|
||||
|
||||
/* dummy UDataInfo cf. udata.h */
|
||||
static UDataInfo dataInfo = {
|
||||
sizeof(UDataInfo),
|
||||
0,
|
||||
|
||||
U_IS_BIG_ENDIAN,
|
||||
U_CHARSET_FAMILY,
|
||||
U_SIZEOF_UCHAR,
|
||||
0,
|
||||
|
||||
{ 0, 0, 0, 0 }, /* dummy dataFormat */
|
||||
{ 0, 0, 0, 0 }, /* dummy formatVersion */
|
||||
{ 0, 0, 0, 0 } /* dummy dataVersion */
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
static int32_t indexes[_SPREP_INDEX_TOP]={ 0 };
|
||||
|
||||
static uint16_t* mappingData= NULL;
|
||||
static int32_t mappingDataCapacity = 0; /* we skip the first index in mapping data */
|
||||
static int16_t currentIndex = 0; /* the current index into the data trie */
|
||||
static int32_t maxLength = 0; /* maximum length of mapping string */
|
||||
|
||||
|
||||
/* UDataInfo cf. udata.h */
|
||||
static UDataInfo dataInfo={
|
||||
sizeof(UDataInfo),
|
||||
0,
|
||||
|
||||
U_IS_BIG_ENDIAN,
|
||||
U_CHARSET_FAMILY,
|
||||
U_SIZEOF_UCHAR,
|
||||
0,
|
||||
|
||||
{ 0x53, 0x50, 0x52, 0x50 }, /* dataFormat="SPRP" */
|
||||
{ 3, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */
|
||||
{ 3, 2, 0, 0 } /* dataVersion (Unicode version) */
|
||||
};
|
||||
void
|
||||
setUnicodeVersion(const char *v) {
|
||||
UVersionInfo version;
|
||||
u_versionFromString(version, v);
|
||||
uprv_memcpy(dataInfo.dataVersion, version, 4);
|
||||
}
|
||||
|
||||
void
|
||||
setUnicodeVersionNC(UVersionInfo version){
|
||||
uint32_t univer = version[0] << 24;
|
||||
univer += version[1] << 16;
|
||||
univer += version[2] << 8;
|
||||
univer += version[3];
|
||||
indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] = univer;
|
||||
}
|
||||
static UNewTrie *sprepTrie;
|
||||
|
||||
#define MAX_DATA_LENGTH 11500
|
||||
|
||||
|
||||
#define SPREP_DELTA_RANGE_POSITIVE_LIMIT 8191
|
||||
#define SPREP_DELTA_RANGE_NEGATIVE_LIMIT -8192
|
||||
|
||||
|
||||
extern void
|
||||
init() {
|
||||
|
||||
sprepTrie = (UNewTrie *)uprv_malloc(sizeof(UNewTrie));
|
||||
uprv_memset(sprepTrie, 0, sizeof(UNewTrie));
|
||||
|
||||
/* initialize the two tries */
|
||||
if(NULL==utrie_open(sprepTrie, NULL, MAX_DATA_LENGTH, 0, 0, FALSE)) {
|
||||
fprintf(stderr, "error: failed to initialize tries\n");
|
||||
exit(U_MEMORY_ALLOCATION_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
static UHashtable* hashTable = NULL;
|
||||
|
||||
|
||||
struct ValueStruct {
|
||||
UChar* mapping;
|
||||
int16_t length;
|
||||
UStringPrepType type;
|
||||
};
|
||||
|
||||
typedef struct ValueStruct ValueStruct;
|
||||
|
||||
/* Callback for deleting the value from the hashtable */
|
||||
void U_CALLCONV valueDeleter(void* obj){
|
||||
ValueStruct* value = (ValueStruct*) obj;
|
||||
uprv_free(value->mapping);
|
||||
uprv_free(value);
|
||||
}
|
||||
|
||||
/* Callback for hashing the entry */
|
||||
static int32_t U_CALLCONV hashEntry(const UHashTok parm) {
|
||||
return parm.integer;
|
||||
}
|
||||
|
||||
/* Callback for comparing two entries */
|
||||
static UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) {
|
||||
return (UBool)(p1.integer != p2.integer);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
storeMappingData(){
|
||||
|
||||
int32_t pos = -1;
|
||||
const UHashElement* element = NULL;
|
||||
ValueStruct* value = NULL;
|
||||
int32_t codepoint = 0;
|
||||
int32_t elementCount = uhash_count(hashTable);
|
||||
int32_t writtenElementCount = 0;
|
||||
int32_t mappingLength = 1; /* minimum mapping length */
|
||||
int32_t oldMappingLength = 0;
|
||||
uint16_t trieWord =0;
|
||||
int32_t limitIndex = 0;
|
||||
|
||||
/*initialize the mapping data */
|
||||
mappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * (mappingDataCapacity));
|
||||
|
||||
uprv_memset(mappingData,0,U_SIZEOF_UCHAR * mappingDataCapacity);
|
||||
|
||||
while(writtenElementCount < elementCount){
|
||||
|
||||
while( (element = uhash_nextElement(hashTable, &pos))!=NULL){
|
||||
|
||||
codepoint = element->key.integer;
|
||||
value = (ValueStruct*)element->value.pointer;
|
||||
|
||||
/* store the start of indexes */
|
||||
if(oldMappingLength != mappingLength){
|
||||
/* Assume that index[] is used according to the enums defined */
|
||||
if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){
|
||||
indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex;
|
||||
}
|
||||
if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH &&
|
||||
mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){
|
||||
|
||||
limitIndex = currentIndex;
|
||||
|
||||
}
|
||||
oldMappingLength = mappingLength;
|
||||
}
|
||||
|
||||
if(value->length == mappingLength){
|
||||
uint32_t savedTrieWord = 0;
|
||||
trieWord = currentIndex << 2;
|
||||
/* turn on the 2nd bit to signal that the following bits contain an index */
|
||||
trieWord += 0x02;
|
||||
|
||||
if(trieWord > _SPREP_TYPE_THRESHOLD){
|
||||
fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
|
||||
exit(U_ILLEGAL_CHAR_FOUND);
|
||||
}
|
||||
/* figure out if the code point has type already stored */
|
||||
savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
|
||||
if(savedTrieWord!=0){
|
||||
if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
|
||||
/* turn on the first bit in trie word */
|
||||
trieWord += 0x01;
|
||||
}else{
|
||||
/*
|
||||
* the codepoint has value something other than prohibited
|
||||
* and a mapping .. error!
|
||||
*/
|
||||
fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", codepoint);
|
||||
exit(U_ILLEGAL_ARGUMENT_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
/* now set the value in the trie */
|
||||
if(!utrie_set32(sprepTrie,codepoint,trieWord)){
|
||||
fprintf(stderr,"Could not set the value for code point.\n");
|
||||
exit(U_ILLEGAL_ARGUMENT_ERROR);
|
||||
}
|
||||
|
||||
/* written the trie word for the codepoint... increment the count*/
|
||||
writtenElementCount++;
|
||||
|
||||
/* sanity check are we exceeding the max number allowed */
|
||||
if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){
|
||||
fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n", currentIndex+value->length, _SPREP_MAX_INDEX_VALUE);
|
||||
exit(U_INDEX_OUTOFBOUNDS_ERROR);
|
||||
}
|
||||
|
||||
/* copy the mapping data */
|
||||
if(currentIndex+value->length+1 <= mappingDataCapacity){
|
||||
/* write the length */
|
||||
if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
|
||||
/* the cast here is safe since we donot expect the length to be > 65535 */
|
||||
mappingData[currentIndex++] = (uint16_t) mappingLength;
|
||||
}
|
||||
/* copy the contents to mappindData array */
|
||||
uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
|
||||
currentIndex += value->length;
|
||||
|
||||
}else{
|
||||
/* realloc */
|
||||
UChar* newMappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * mappingDataCapacity*2);
|
||||
if(newMappingData == NULL){
|
||||
fprintf(stderr, "Could not realloc the mapping data!\n");
|
||||
exit(U_MEMORY_ALLOCATION_ERROR);
|
||||
}
|
||||
uprv_memmove(newMappingData, mappingData, U_SIZEOF_UCHAR * mappingDataCapacity);
|
||||
mappingDataCapacity *= 2;
|
||||
uprv_free(mappingData);
|
||||
mappingData = newMappingData;
|
||||
/* write the length */
|
||||
if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
|
||||
/* the cast here is safe since we donot expect the length to be > 65535 */
|
||||
mappingData[currentIndex++] = (uint16_t) mappingLength;
|
||||
}
|
||||
/* continue copying */
|
||||
uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
|
||||
currentIndex += value->length;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
mappingLength++;
|
||||
pos = -1;
|
||||
}
|
||||
/* set the last length for range check */
|
||||
if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){
|
||||
indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1;
|
||||
}else{
|
||||
indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
extern void setOptions(int32_t options){
|
||||
indexes[_SPREP_OPTIONS] = options;
|
||||
}
|
||||
extern void
|
||||
storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length,
|
||||
UStringPrepType type, UErrorCode* status){
|
||||
|
||||
|
||||
UChar* map = NULL;
|
||||
int16_t adjustedLen=0, i;
|
||||
uint16_t trieWord = 0;
|
||||
ValueStruct *value = NULL;
|
||||
uint32_t savedTrieWord = 0;
|
||||
|
||||
/* initialize the hashtable */
|
||||
if(hashTable==NULL){
|
||||
hashTable = uhash_open(hashEntry, compareEntries, status);
|
||||
uhash_setValueDeleter(hashTable, valueDeleter);
|
||||
}
|
||||
|
||||
/* figure out if the code point has type already stored */
|
||||
savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
|
||||
if(savedTrieWord!=0){
|
||||
if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
|
||||
/* turn on the first bit in trie word */
|
||||
trieWord += 0x01;
|
||||
}else{
|
||||
/*
|
||||
* the codepoint has value something other than prohibited
|
||||
* and a mapping .. error!
|
||||
*/
|
||||
fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", codepoint);
|
||||
exit(U_ILLEGAL_ARGUMENT_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
/* figure out the real length */
|
||||
for(i=0; i<length; i++){
|
||||
if(mapping[i] > 0xFFFF){
|
||||
adjustedLen +=2;
|
||||
}else{
|
||||
adjustedLen++;
|
||||
}
|
||||
}
|
||||
|
||||
if(adjustedLen == 0){
|
||||
trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2);
|
||||
/* make sure that the value of trieWord is less than the threshold */
|
||||
if(trieWord < _SPREP_TYPE_THRESHOLD){
|
||||
/* now set the value in the trie */
|
||||
if(!utrie_set32(sprepTrie,codepoint,trieWord)){
|
||||
fprintf(stderr,"Could not set the value for code point.\n");
|
||||
exit(U_ILLEGAL_ARGUMENT_ERROR);
|
||||
}
|
||||
/* value is set so just return */
|
||||
return;
|
||||
}else{
|
||||
fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
|
||||
exit(U_ILLEGAL_CHAR_FOUND);
|
||||
}
|
||||
}
|
||||
|
||||
if(adjustedLen == 1){
|
||||
/* calculate the delta */
|
||||
int16_t delta = (int32_t)codepoint - (int16_t) mapping[0];
|
||||
if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){
|
||||
|
||||
trieWord = delta << 2;
|
||||
|
||||
|
||||
/* make sure that the second bit is OFF */
|
||||
if((trieWord & 0x02) != 0 ){
|
||||
fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n");
|
||||
exit(U_INTERNAL_PROGRAM_ERROR);
|
||||
}
|
||||
/* make sure that the value of trieWord is less than the threshold */
|
||||
if(trieWord < _SPREP_TYPE_THRESHOLD){
|
||||
/* now set the value in the trie */
|
||||
if(!utrie_set32(sprepTrie,codepoint,trieWord)){
|
||||
fprintf(stderr,"Could not set the value for code point.\n");
|
||||
exit(U_ILLEGAL_ARGUMENT_ERROR);
|
||||
}
|
||||
/* value is set so just return */
|
||||
return;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* if the delta is not in the given range or if the trieWord is larger than the threshold
|
||||
* just fall through for storing the mapping in the mapping table
|
||||
*/
|
||||
}
|
||||
|
||||
map = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (adjustedLen+1));
|
||||
uprv_memset(map,0,U_SIZEOF_UCHAR * (adjustedLen+1));
|
||||
|
||||
i=0;
|
||||
|
||||
while(i<length){
|
||||
if(mapping[i] <= 0xFFFF){
|
||||
map[i] = (uint16_t)mapping[i];
|
||||
}else{
|
||||
map[i] = UTF16_LEAD(mapping[i]);
|
||||
map[i+1] = UTF16_TRAIL(mapping[i]);
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct));
|
||||
value->mapping = map;
|
||||
value->type = type;
|
||||
value->length = adjustedLen;
|
||||
if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){
|
||||
mappingDataCapacity++;
|
||||
}
|
||||
if(maxLength < value->length){
|
||||
maxLength = value->length;
|
||||
}
|
||||
uhash_iput(hashTable,codepoint,value,status);
|
||||
mappingDataCapacity += adjustedLen;
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status));
|
||||
exit(*status);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern void
|
||||
storeRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status){
|
||||
uint16_t trieWord = 0;
|
||||
uint32_t i=0;
|
||||
|
||||
trieWord += (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */
|
||||
if(trieWord > 0xFFFF){
|
||||
fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n");
|
||||
exit(U_ILLEGAL_CHAR_FOUND);
|
||||
}
|
||||
if(start == end){
|
||||
uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL);
|
||||
if(savedTrieWord>0){
|
||||
if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){
|
||||
/*
|
||||
* A mapping is stored in the trie word
|
||||
* and the only other possible type that a
|
||||
* code point can have is USPREP_PROHIBITED
|
||||
*
|
||||
*/
|
||||
|
||||
/* turn on the 0th bit in the savedTrieWord */
|
||||
savedTrieWord += 0x01;
|
||||
|
||||
/* the downcast is safe since we only save 16 bit values */
|
||||
trieWord = (uint16_t)savedTrieWord;
|
||||
|
||||
/* make sure that the value of trieWord is less than the threshold */
|
||||
if(trieWord < _SPREP_TYPE_THRESHOLD){
|
||||
/* now set the value in the trie */
|
||||
if(!utrie_set32(sprepTrie,start,trieWord)){
|
||||
fprintf(stderr,"Could not set the value for code point.\n");
|
||||
exit(U_ILLEGAL_ARGUMENT_ERROR);
|
||||
}
|
||||
/* value is set so just return */
|
||||
return;
|
||||
}else{
|
||||
fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
|
||||
exit(U_ILLEGAL_CHAR_FOUND);
|
||||
}
|
||||
|
||||
}else if(savedTrieWord != trieWord){
|
||||
fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", start);
|
||||
exit(U_ILLEGAL_ARGUMENT_ERROR);
|
||||
}
|
||||
/* if savedTrieWord == trieWord .. fall through and set the value */
|
||||
}
|
||||
if(!utrie_set32(sprepTrie,start,trieWord)){
|
||||
fprintf(stderr,"Could not set the value for code point \\U%08X.\n", start);
|
||||
exit(U_ILLEGAL_ARGUMENT_ERROR);
|
||||
}
|
||||
}else{
|
||||
if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){
|
||||
fprintf(stderr,"Value for certain codepoint already set.\n");
|
||||
exit(U_ILLEGAL_CHAR_FOUND);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* folding value: just store the offset (16 bits) if there is any non-0 entry */
|
||||
static uint32_t U_CALLCONV
|
||||
getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
|
||||
uint32_t foldedValue, value;
|
||||
UChar32 limit=0;
|
||||
UBool inBlockZero;
|
||||
|
||||
foldedValue=0;
|
||||
|
||||
limit=start+0x400;
|
||||
while(start<limit) {
|
||||
value=utrie_get32(trie, start, &inBlockZero);
|
||||
if(inBlockZero) {
|
||||
start+=UTRIE_DATA_BLOCK_LENGTH;
|
||||
} else if(value!=0) {
|
||||
return (uint32_t)offset;
|
||||
} else {
|
||||
++start;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_IDNA */
|
||||
|
||||
extern void
|
||||
generateData(const char *dataDir, const char *packageName, const char* bundleName) {
|
||||
static uint8_t sprepTrieBlock[100000];
|
||||
|
||||
UNewDataMemory *pData;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
int32_t size, dataLength;
|
||||
char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100);
|
||||
|
||||
#if UCONFIG_NO_IDNA
|
||||
|
||||
size=0;
|
||||
|
||||
#else
|
||||
|
||||
int32_t sprepTrieSize;
|
||||
|
||||
/* sort and add mapping data */
|
||||
storeMappingData();
|
||||
|
||||
sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, TRUE, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
|
||||
size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes);
|
||||
if(beVerbose) {
|
||||
printf("size of sprep trie %5u bytes\n", sprepTrieSize);
|
||||
printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size);
|
||||
printf("size of mapping data array %5u bytes\n",mappingDataCapacity * U_SIZEOF_UCHAR);
|
||||
printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex);
|
||||
printf("Maximum length of the mapping string is : %i \n", maxLength);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
uprv_strcpy(fileName,packageName);
|
||||
uprv_strcat(fileName,"_");
|
||||
uprv_strcat(fileName,bundleName);
|
||||
/* write the data */
|
||||
pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo,
|
||||
haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode);
|
||||
exit(errorCode);
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize;
|
||||
indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR;
|
||||
|
||||
udata_writeBlock(pData, indexes, sizeof(indexes));
|
||||
udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize);
|
||||
udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
/* finish up */
|
||||
dataLength=udata_finish(pData, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode);
|
||||
exit(errorCode);
|
||||
}
|
||||
|
||||
if(dataLength!=size) {
|
||||
fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n",
|
||||
(long)dataLength, (long)size);
|
||||
exit(U_INTERNAL_PROGRAM_ERROR);
|
||||
}
|
||||
|
||||
/* done with writing the data .. close the hashtable */
|
||||
uhash_close(hashTable);
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
extern void
|
||||
cleanUpData(void) {
|
||||
|
||||
utrie_close(sprepTrie);
|
||||
uprv_free(sprepTrie);
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_IDNA */
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
Loading…
Add table
Reference in a new issue