mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-20 20:19:32 +00:00
ICU-3064 Add stringPrep tests to TestAll
X-SVN-Rev: 12959
This commit is contained in:
parent
4d18bb2b4f
commit
fe2e828dca
8 changed files with 213 additions and 82 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/TestAll.java,v $
|
||||
* $Date: 2003/06/03 18:49:28 $
|
||||
* $Revision: 1.50 $
|
||||
* $Date: 2003/08/27 03:07:43 $
|
||||
* $Revision: 1.51 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -40,7 +40,8 @@ public class TestAll extends TestGroup {
|
|||
"com.ibm.icu.dev.test.util.TestAll",
|
||||
"com.ibm.icu.dev.test.iterator.TestUCharacterIterator", // not a group
|
||||
"com.ibm.icu.dev.test.bigdec.DiagBigDecimal", // not a group
|
||||
"com.ibm.icu.dev.test.impl.TestAll"
|
||||
"com.ibm.icu.dev.test.impl.TestAll",
|
||||
"com.ibm.icu.dev.test.stringprep.TestAll"
|
||||
},
|
||||
"All tests in ICU");
|
||||
}
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/NFS4StringPrep.java,v $
|
||||
* $Date: 2003/08/21 23:42:25 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2003/08/27 03:08:29 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -44,11 +44,7 @@ public final class NFS4StringPrep {
|
|||
//singleton instance
|
||||
private static NFS4StringPrep prep = null;
|
||||
|
||||
// we donot synchronize the constructor because we
|
||||
// know that the constructor is only called from
|
||||
// getInstance method if and only if the the singleton
|
||||
// intance is null, which means this constructor is called
|
||||
// only once
|
||||
|
||||
private NFS4StringPrep ()throws IOException{
|
||||
|
||||
InputStream nfscssFile = TestUtil.getDataStream(NFS4DataFileNames[0]);
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/TestStringPrep.java,v $
|
||||
* $Date: 2003/08/21 23:42:21 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2003/08/27 03:08:29 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -17,8 +17,6 @@ import com.ibm.icu.dev.test.TestFmwk;
|
|||
/**
|
||||
* @author ram
|
||||
*
|
||||
* To change the template for this generated type comment go to
|
||||
* Window>Preferences>Java>Code Generation>Code and Comments
|
||||
*/
|
||||
public class TestStringPrep extends TestFmwk {
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
@ -152,11 +150,7 @@ public class TestStringPrep extends TestFmwk {
|
|||
|
||||
}
|
||||
}
|
||||
private static String[] cs_prep_data = {
|
||||
//BIDI checking is turned off .. so
|
||||
"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774\u0644\u064A\u0647\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74",
|
||||
|
||||
};
|
||||
public void TestCSPrep(){
|
||||
|
||||
// Checking for bidi is turned off
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/IDNA.java,v $
|
||||
* $Date: 2003/08/21 23:40:42 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2003/08/27 03:09:08 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -19,7 +19,7 @@ import com.ibm.icu.text.UCharacterIterator;
|
|||
|
||||
/**
|
||||
*
|
||||
* UIDNA API implements the IDNA protocol as defined in the IDNA draft
|
||||
* IDNA API implements the IDNA protocol as defined in the IDNA draft
|
||||
* (http://www.ietf.org/rfc/rfc3490.txt).
|
||||
* The draft defines 2 operations: ToASCII and ToUnicode. Domain labels
|
||||
* containing non-ASCII code points are required to be processed by
|
||||
|
@ -38,7 +38,8 @@ import com.ibm.icu.text.UCharacterIterator;
|
|||
* once.
|
||||
* ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
|
||||
* ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
|
||||
*
|
||||
*
|
||||
* @author Ram Viswanadha
|
||||
*/
|
||||
public final class IDNA {
|
||||
|
||||
|
@ -59,26 +60,27 @@ public final class IDNA {
|
|||
* do not check if the input conforms to STD-3 ASCII rules.
|
||||
*
|
||||
* @see convertToASCII convertToUnicode
|
||||
* @draft ICU 2.6
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static final int DEFAULT = 0x0000;
|
||||
/**
|
||||
* Option to allow processing of unassigned codepoints in the input
|
||||
*
|
||||
* @see convertToASCII convertToUnicode
|
||||
* @draft ICU 2.6
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static final int ALLOW_UNASSIGNED = 0x0001;
|
||||
/**
|
||||
* Option to check if input conforms to STD-3 ASCII rules
|
||||
*
|
||||
* @see convertToASCII convertToUnicode
|
||||
* @draft ICU 2.6
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static final int USE_STD3_RULES = 0x0002;
|
||||
|
||||
private static StringPrep prep = null;
|
||||
|
||||
|
||||
private static synchronized void loadInstance()
|
||||
throws IOException{
|
||||
if(prep==null){
|
||||
|
@ -187,6 +189,9 @@ public final class IDNA {
|
|||
return false;
|
||||
}
|
||||
|
||||
/* private constructor to prevent construction of the object */
|
||||
private IDNA(){}
|
||||
|
||||
/**
|
||||
* This function implements the ToASCII operation as defined in the IDNA RFC.
|
||||
* This operation is done on <b>single labels</b> before sending it to something that expects
|
||||
|
@ -380,7 +385,7 @@ public final class IDNA {
|
|||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src The input string as UCharacterIterator to be processed
|
||||
* @param iter The input string as UCharacterIterator to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
|
@ -566,7 +571,7 @@ public final class IDNA {
|
|||
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
|
||||
* "www","example", and "com".
|
||||
*
|
||||
* @param src The input string as UCharacterIterator to be processed
|
||||
* @param iter The input string as UCharacterIterator to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
|
@ -690,7 +695,7 @@ public final class IDNA {
|
|||
* and then convert. This function does not offer that level of granularity. The options once
|
||||
* set will apply to all labels in the domain name
|
||||
*
|
||||
* @param src The input string as UCharacterIterator to be processed
|
||||
* @param iter The input string as UCharacterIterator to be processed
|
||||
* @param options A bit set of options:
|
||||
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
|
||||
* and do not use STD3 ASCII rules
|
||||
|
|
|
@ -4,48 +4,140 @@
|
|||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/ParseException.java,v $
|
||||
* $Date: 2003/08/21 23:40:39 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2003/08/27 03:09:08 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.stringprep;
|
||||
|
||||
/**
|
||||
* @author ram
|
||||
* Exception that signals an error has occurred while parsing the
|
||||
* input to StringPrep or IDNA.
|
||||
*
|
||||
* To change the template for this generated type comment go to
|
||||
* Window>Preferences>Java>Code Generation>Code and Comments
|
||||
* @author Ram Viswanadha
|
||||
*/
|
||||
public class ParseException extends Exception {
|
||||
|
||||
/**
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static final int INVALID_CHAR_FOUND = 0;
|
||||
/**
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static final int ILLEGAL_CHAR_FOUND = 1;
|
||||
/**
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static final int PROHIBITED_ERROR = 2;
|
||||
/**
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static final int UNASSIGNED_ERROR = 3;
|
||||
/**
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static final int CHECK_BIDI_ERROR = 4;
|
||||
/**
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static final int STD3_ASCII_RULES_ERROR = 5;
|
||||
/**
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static final int ACE_PREFIX_ERROR = 6;
|
||||
/**
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static final int VERIFICATION_ERROR = 7;
|
||||
/**
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static final int LABEL_TOO_LONG_ERROR = 8;
|
||||
/**
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public static final int BUFFER_OVERFLOW_ERROR = 9;
|
||||
|
||||
/**
|
||||
* Construct a ParseException object with the given message
|
||||
* and error code
|
||||
*
|
||||
* @param message A string describing the type of error that occurred
|
||||
* @param error The error that has occurred
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public ParseException(String message,int error){
|
||||
super(message);
|
||||
this.error = error;
|
||||
this.offset = -1;
|
||||
this.line = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a ParseException object with the given message and
|
||||
* error code
|
||||
*
|
||||
* @param message A string describing the type of error that occurred
|
||||
* @param error The error that has occurred
|
||||
* @param rules The input rules string
|
||||
* @param pos The position of error in the rules string
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public ParseException(String message,int error, String rules, int pos){
|
||||
super(message);
|
||||
this.error = error;
|
||||
setContext(rules,pos);
|
||||
setContext(rules,pos);
|
||||
this.offset = -1;
|
||||
this.line = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a ParseException object with the given message and error code
|
||||
*
|
||||
* @param message A string describing the type of error that occurred
|
||||
* @param error The error that has occurred
|
||||
* @param rules The input rules string
|
||||
* @param pos The position of error in the rules string
|
||||
* @param offset The character offset to the error. If the line field is
|
||||
* being used, then this offset is from the start of the line.
|
||||
* If the line field is not being used, then this offset is from
|
||||
* the start of the text.The default value of this field
|
||||
* is -1.
|
||||
* @param lineNumber The line number at which the error has occurred.
|
||||
* If the parse engine is not using this field, it should set it to zero. Otherwise
|
||||
* it should be a positive integer. The default value of this field
|
||||
* is -1. It will be set to 0 if the code populating this struct is not
|
||||
* using line numbers.
|
||||
*/
|
||||
public ParseException(String message, int error, String rules, int pos, int offset, int lineNumber){
|
||||
super(message);
|
||||
this.error = error;
|
||||
setContext(rules,pos);
|
||||
this.offset = offset;
|
||||
this.line = lineNumber;
|
||||
}
|
||||
/**
|
||||
* Compare this ParseException to another and evaluate if they are equal.
|
||||
* The comparison works only on the type of error and does not compare
|
||||
* the rules strings, if any, for equality.
|
||||
*
|
||||
* @param other The exception that this object should be compared to
|
||||
* @return true if the objects are equal, false if unequal
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public boolean equals(Object other){
|
||||
if(!(other instanceof ParseException)){
|
||||
return false;
|
||||
}
|
||||
return ((ParseException)other).error == this.error;
|
||||
|
||||
}
|
||||
/**
|
||||
* Returns the position of error in the rules string
|
||||
*
|
||||
* @return String
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public String toString(){
|
||||
StringBuffer buf = new StringBuffer();
|
||||
buf.append(super.getMessage());
|
||||
|
@ -58,13 +150,14 @@ public class ParseException extends Exception {
|
|||
}
|
||||
|
||||
private int error;
|
||||
|
||||
/**
|
||||
* The line on which the error occured. If the parse engine
|
||||
* is not using this field, it should set it to zero. Otherwise
|
||||
* it should be a positive integer. The default value of this field
|
||||
* is -1. It will be set to 0 if the code populating this struct is not
|
||||
* using line numbers.
|
||||
* @stable ICU 2.0
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
private int line;
|
||||
|
||||
|
@ -75,68 +168,51 @@ public class ParseException extends Exception {
|
|||
* the start of the text.The default value of this field
|
||||
* is -1. It will be set to appropriate value by the code that
|
||||
* populating the struct.
|
||||
* @stable ICU 2.0
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
private int offset;
|
||||
|
||||
/**
|
||||
* Textual context before the error. Null-terminated.
|
||||
* May be the empty string if not implemented by parser.
|
||||
* @stable ICU 2.0
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
private StringBuffer preContext = new StringBuffer();
|
||||
|
||||
/**
|
||||
* Textual context after the error. Null-terminated.
|
||||
* May be the empty string if not implemented by parser.
|
||||
* @stable ICU 2.0
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
private StringBuffer postContext = new StringBuffer();
|
||||
|
||||
public static final int PARSE_CONTEXT_LEN = 16;
|
||||
|
||||
public void setOffset(int offset){
|
||||
this.offset = offset;
|
||||
}
|
||||
public int getOffset(){
|
||||
return offset;
|
||||
}
|
||||
public int getLineNumber(){
|
||||
return line;
|
||||
}
|
||||
public int setLineNumber(int lineNumber){
|
||||
return line;
|
||||
}
|
||||
public String getPreContext(){
|
||||
return preContext.toString();
|
||||
}
|
||||
public String getPostContext(){
|
||||
return postContext.toString();
|
||||
}
|
||||
|
||||
|
||||
public void setPreContext(String str, int pos){
|
||||
private void setPreContext(String str, int pos){
|
||||
setPreContext(str.toCharArray(),pos);
|
||||
}
|
||||
public void setPreContext(char[] str, int pos){
|
||||
|
||||
private void setPreContext(char[] str, int pos){
|
||||
int start = (pos <= PARSE_CONTEXT_LEN)? 0 : (pos - (PARSE_CONTEXT_LEN-1));
|
||||
int len = (start <= PARSE_CONTEXT_LEN)? start : PARSE_CONTEXT_LEN;
|
||||
preContext.append(str,start,len);
|
||||
|
||||
}
|
||||
public void setPostContext(String str, int pos){
|
||||
|
||||
private void setPostContext(String str, int pos){
|
||||
setPostContext(str.toCharArray(),pos);
|
||||
}
|
||||
public void setPostContext(char[] str, int pos){
|
||||
|
||||
private void setPostContext(char[] str, int pos){
|
||||
int start = pos;
|
||||
int len = str.length - start;
|
||||
postContext.append(str,start,len);
|
||||
|
||||
}
|
||||
public void setContext(char[]str,int pos){
|
||||
setPreContext(str,pos);
|
||||
setPostContext(str,pos);
|
||||
}
|
||||
public void setContext(String str,int pos){
|
||||
|
||||
private void setContext(String str,int pos){
|
||||
setPreContext(str,pos);
|
||||
setPostContext(str,pos);
|
||||
}
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/Punycode.java,v $
|
||||
* $Date: 2003/08/21 23:40:39 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2003/08/27 03:09:08 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -16,13 +16,10 @@ import com.ibm.icu.lang.UCharacter;
|
|||
import com.ibm.icu.text.UTF16;
|
||||
|
||||
/**
|
||||
* Ported code from ICU punycode.c
|
||||
* @author ram
|
||||
*
|
||||
* To change this generated comment edit the template variable "typecomment":
|
||||
* Window>Preferences>Java>Templates.
|
||||
* To enable and disable the creation of type comments go to
|
||||
* Window>Preferences>Java>Code Generation.
|
||||
*/
|
||||
|
||||
/* Package Private class */
|
||||
final class Punycode {
|
||||
|
||||
|
@ -131,7 +128,16 @@ final class Punycode {
|
|||
return (char)((ZERO-26)+digit);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts Unicode to Punycode.
|
||||
* The input string must not contain single, unpaired surrogates.
|
||||
* The output will be represented as an array of ASCII code points.
|
||||
*
|
||||
* @param src
|
||||
* @param caseFlags
|
||||
* @return
|
||||
* @throws ParseException
|
||||
*/
|
||||
public static StringBuffer encode(StringBuffer src, boolean[] caseFlags) throws ParseException{
|
||||
|
||||
int[] cpBuffer = new int[MAX_CP_COUNT];
|
||||
|
@ -283,6 +289,15 @@ final class Punycode {
|
|||
private static boolean isSurrogate(int ch){
|
||||
return (((ch)&0xfffff800)==0xd800);
|
||||
}
|
||||
/**
|
||||
* Converts Punycode to Unicode.
|
||||
* The Unicode string will be at most as long as the Punycode string.
|
||||
*
|
||||
* @param src
|
||||
* @param caseFlags
|
||||
* @return
|
||||
* @throws ParseException
|
||||
*/
|
||||
public static StringBuffer decode(StringBuffer src, boolean[] caseFlags)
|
||||
throws ParseException{
|
||||
int srcLength = src.length();
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/StringPrep.java,v $
|
||||
* $Date: 2003/08/21 23:40:41 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2003/08/27 03:09:08 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -27,12 +27,37 @@ import com.ibm.icu.lang.UCharacter;
|
|||
import com.ibm.icu.lang.UCharacterDirection;
|
||||
|
||||
/**
|
||||
* @author ram
|
||||
*
|
||||
* To change the template for this generated type comment go to
|
||||
* Window>Preferences>Java>Code Generation>Code and Comments
|
||||
* StringPrep API implements the StingPrep framework as described by RFC 3454.
|
||||
* StringPrep prepares Unicode strings for use in network protocols.
|
||||
* Profiles of StingPrep are set of rules and data according to with the
|
||||
* Unicode Strings are prepared. Each profiles contains tables which describe
|
||||
* how a code point should be treated. The tables are broadly classied into
|
||||
* <ul>
|
||||
* <li> Unassinged Table: Contains code points that are unassigned
|
||||
* in the Unicode Version supported by StringPrep. Currently
|
||||
* RFC 3454 supports Unicode 3.2. </li>
|
||||
* <li> Prohibited Table: Contains code points that are prohibted from
|
||||
* the output of the StringPrep processing function. </li>
|
||||
* <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li>
|
||||
* </ul>
|
||||
*
|
||||
* The procedure for preparing Unicode strings:
|
||||
* <ol>
|
||||
* <li> Map: For each character in the input, check if it has a mapping
|
||||
* and, if so, replace it with its mapping. </li>
|
||||
* <li> Normalize: Possibly normalize the result of step 1 using Unicode
|
||||
* normalization. </li>
|
||||
* <li> Prohibit: Check for any characters that are not allowed in the
|
||||
* output. If any are found, return an error.</li>
|
||||
* <li> Check bidi: Possibly check for right-to-left characters, and if
|
||||
* any are found, make sure that the whole string satisfies the
|
||||
* requirements for bidirectional strings. If the string does not
|
||||
* satisfy the requirements for bidirectional strings, return an
|
||||
* error. </li>
|
||||
* </ol>
|
||||
* @author Ram Viswanadha
|
||||
*/
|
||||
public class StringPrep {
|
||||
public final class StringPrep {
|
||||
/**
|
||||
* Option to prohibit processing of unassigned code points in the input
|
||||
*
|
||||
|
|
19
icu4j/src/com/ibm/icu/stringprep/package.html
Normal file
19
icu4j/src/com/ibm/icu/stringprep/package.html
Normal file
|
@ -0,0 +1,19 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
|
||||
<html>
|
||||
|
||||
<head><!-- Copyright (C) 2003, International Business Machines Corporation and
|
||||
others. All Rights Reserved.
|
||||
|
||||
$Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/package.html,v $
|
||||
$Revision: 1.1 $
|
||||
$Date: 2003/08/27 03:09:08 $
|
||||
-->
|
||||
|
||||
<title>C:\cvs\icu4j\src\com\ibm\demo\package.html</title>
|
||||
</head>
|
||||
|
||||
<body bgcolor="white">
|
||||
|
||||
<p>StringPrep and IDNA support.</p>
|
||||
</body>
|
||||
</html>
|
Loading…
Add table
Reference in a new issue