ICU-3064 Add stringPrep tests to TestAll

X-SVN-Rev: 12959
This commit is contained in:
Ram Viswanadha 2003-08-27 03:09:08 +00:00
parent 4d18bb2b4f
commit fe2e828dca
8 changed files with 213 additions and 82 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/TestAll.java,v $
* $Date: 2003/06/03 18:49:28 $
* $Revision: 1.50 $
* $Date: 2003/08/27 03:07:43 $
* $Revision: 1.51 $
*
*****************************************************************************************
*/
@ -40,7 +40,8 @@ public class TestAll extends TestGroup {
"com.ibm.icu.dev.test.util.TestAll",
"com.ibm.icu.dev.test.iterator.TestUCharacterIterator", // not a group
"com.ibm.icu.dev.test.bigdec.DiagBigDecimal", // not a group
"com.ibm.icu.dev.test.impl.TestAll"
"com.ibm.icu.dev.test.impl.TestAll",
"com.ibm.icu.dev.test.stringprep.TestAll"
},
"All tests in ICU");
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/NFS4StringPrep.java,v $
* $Date: 2003/08/21 23:42:25 $
* $Revision: 1.1 $
* $Date: 2003/08/27 03:08:29 $
* $Revision: 1.2 $
*
*******************************************************************************
*/
@ -44,11 +44,7 @@ public final class NFS4StringPrep {
//singleton instance
private static NFS4StringPrep prep = null;
// we donot synchronize the constructor because we
// know that the constructor is only called from
// getInstance method if and only if the the singleton
// intance is null, which means this constructor is called
// only once
private NFS4StringPrep ()throws IOException{
InputStream nfscssFile = TestUtil.getDataStream(NFS4DataFileNames[0]);

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/TestStringPrep.java,v $
* $Date: 2003/08/21 23:42:21 $
* $Revision: 1.1 $
* $Date: 2003/08/27 03:08:29 $
* $Revision: 1.2 $
*
*******************************************************************************
*/
@ -17,8 +17,6 @@ import com.ibm.icu.dev.test.TestFmwk;
/**
* @author ram
*
* To change the template for this generated type comment go to
* Window>Preferences>Java>Code Generation>Code and Comments
*/
public class TestStringPrep extends TestFmwk {
public static void main(String[] args) throws Exception {
@ -152,11 +150,7 @@ public class TestStringPrep extends TestFmwk {
}
}
private static String[] cs_prep_data = {
//BIDI checking is turned off .. so
"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774\u0644\u064A\u0647\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74",
};
public void TestCSPrep(){
// Checking for bidi is turned off

View file

@ -4,8 +4,8 @@
* others. All Rights Reserved. *
*******************************************************************************
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/IDNA.java,v $
* $Date: 2003/08/21 23:40:42 $
* $Revision: 1.1 $
* $Date: 2003/08/27 03:09:08 $
* $Revision: 1.2 $
*
*****************************************************************************************
*/
@ -19,7 +19,7 @@ import com.ibm.icu.text.UCharacterIterator;
/**
*
* UIDNA API implements the IDNA protocol as defined in the IDNA draft
* IDNA API implements the IDNA protocol as defined in the IDNA draft
* (http://www.ietf.org/rfc/rfc3490.txt).
* The draft defines 2 operations: ToASCII and ToUnicode. Domain labels
* containing non-ASCII code points are required to be processed by
@ -38,7 +38,8 @@ import com.ibm.icu.text.UCharacterIterator;
* once.
* ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
* ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
*
*
* @author Ram Viswanadha
*/
public final class IDNA {
@ -59,26 +60,27 @@ public final class IDNA {
* do not check if the input conforms to STD-3 ASCII rules.
*
* @see convertToASCII convertToUnicode
* @draft ICU 2.6
* @draft ICU 2.8
*/
public static final int DEFAULT = 0x0000;
/**
* Option to allow processing of unassigned codepoints in the input
*
* @see convertToASCII convertToUnicode
* @draft ICU 2.6
* @draft ICU 2.8
*/
public static final int ALLOW_UNASSIGNED = 0x0001;
/**
* Option to check if input conforms to STD-3 ASCII rules
*
* @see convertToASCII convertToUnicode
* @draft ICU 2.6
* @draft ICU 2.8
*/
public static final int USE_STD3_RULES = 0x0002;
private static StringPrep prep = null;
private static synchronized void loadInstance()
throws IOException{
if(prep==null){
@ -187,6 +189,9 @@ public final class IDNA {
return false;
}
/* private constructor to prevent construction of the object */
private IDNA(){}
/**
* This function implements the ToASCII operation as defined in the IDNA RFC.
* This operation is done on <b>single labels</b> before sending it to something that expects
@ -380,7 +385,7 @@ public final class IDNA {
* and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string as UCharacterIterator to be processed
* @param iter The input string as UCharacterIterator to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
@ -566,7 +571,7 @@ public final class IDNA {
* separated by dots; for e.g." "www.example.com" is composed of 3 labels
* "www","example", and "com".
*
* @param src The input string as UCharacterIterator to be processed
* @param iter The input string as UCharacterIterator to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules
@ -690,7 +695,7 @@ public final class IDNA {
* and then convert. This function does not offer that level of granularity. The options once
* set will apply to all labels in the domain name
*
* @param src The input string as UCharacterIterator to be processed
* @param iter The input string as UCharacterIterator to be processed
* @param options A bit set of options:
* - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points
* and do not use STD3 ASCII rules

View file

@ -4,48 +4,140 @@
* others. All Rights Reserved. *
*******************************************************************************
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/ParseException.java,v $
* $Date: 2003/08/21 23:40:39 $
* $Revision: 1.1 $
* $Date: 2003/08/27 03:09:08 $
* $Revision: 1.2 $
*
*****************************************************************************************
*/
package com.ibm.icu.stringprep;
/**
* @author ram
* Exception that signals an error has occurred while parsing the
* input to StringPrep or IDNA.
*
* To change the template for this generated type comment go to
* Window>Preferences>Java>Code Generation>Code and Comments
* @author Ram Viswanadha
*/
public class ParseException extends Exception {
/**
* @draft ICU 2.8
*/
public static final int INVALID_CHAR_FOUND = 0;
/**
* @draft ICU 2.8
*/
public static final int ILLEGAL_CHAR_FOUND = 1;
/**
* @draft ICU 2.8
*/
public static final int PROHIBITED_ERROR = 2;
/**
* @draft ICU 2.8
*/
public static final int UNASSIGNED_ERROR = 3;
/**
* @draft ICU 2.8
*/
public static final int CHECK_BIDI_ERROR = 4;
/**
* @draft ICU 2.8
*/
public static final int STD3_ASCII_RULES_ERROR = 5;
/**
* @draft ICU 2.8
*/
public static final int ACE_PREFIX_ERROR = 6;
/**
* @draft ICU 2.8
*/
public static final int VERIFICATION_ERROR = 7;
/**
* @draft ICU 2.8
*/
public static final int LABEL_TOO_LONG_ERROR = 8;
/**
* @draft ICU 2.8
*/
public static final int BUFFER_OVERFLOW_ERROR = 9;
/**
* Construct a ParseException object with the given message
* and error code
*
* @param message A string describing the type of error that occurred
* @param error The error that has occurred
* @draft ICU 2.8
*/
public ParseException(String message,int error){
super(message);
this.error = error;
this.offset = -1;
this.line = 0;
}
/**
* Construct a ParseException object with the given message and
* error code
*
* @param message A string describing the type of error that occurred
* @param error The error that has occurred
* @param rules The input rules string
* @param pos The position of error in the rules string
* @draft ICU 2.8
*/
public ParseException(String message,int error, String rules, int pos){
super(message);
this.error = error;
setContext(rules,pos);
setContext(rules,pos);
this.offset = -1;
this.line = 0;
}
/**
* Construct a ParseException object with the given message and error code
*
* @param message A string describing the type of error that occurred
* @param error The error that has occurred
* @param rules The input rules string
* @param pos The position of error in the rules string
* @param offset The character offset to the error. If the line field is
* being used, then this offset is from the start of the line.
* If the line field is not being used, then this offset is from
* the start of the text.The default value of this field
* is -1.
* @param lineNumber The line number at which the error has occurred.
* If the parse engine is not using this field, it should set it to zero. Otherwise
* it should be a positive integer. The default value of this field
* is -1. It will be set to 0 if the code populating this struct is not
* using line numbers.
*/
public ParseException(String message, int error, String rules, int pos, int offset, int lineNumber){
super(message);
this.error = error;
setContext(rules,pos);
this.offset = offset;
this.line = lineNumber;
}
/**
* Compare this ParseException to another and evaluate if they are equal.
* The comparison works only on the type of error and does not compare
* the rules strings, if any, for equality.
*
* @param other The exception that this object should be compared to
* @return true if the objects are equal, false if unequal
* @draft ICU 2.8
*/
public boolean equals(Object other){
if(!(other instanceof ParseException)){
return false;
}
return ((ParseException)other).error == this.error;
}
/**
* Returns the position of error in the rules string
*
* @return String
* @draft ICU 2.8
*/
public String toString(){
StringBuffer buf = new StringBuffer();
buf.append(super.getMessage());
@ -58,13 +150,14 @@ public class ParseException extends Exception {
}
private int error;
/**
* The line on which the error occured. If the parse engine
* is not using this field, it should set it to zero. Otherwise
* it should be a positive integer. The default value of this field
* is -1. It will be set to 0 if the code populating this struct is not
* using line numbers.
* @stable ICU 2.0
* @draft ICU 2.8
*/
private int line;
@ -75,68 +168,51 @@ public class ParseException extends Exception {
* the start of the text.The default value of this field
* is -1. It will be set to appropriate value by the code that
* populating the struct.
* @stable ICU 2.0
* @draft ICU 2.8
*/
private int offset;
/**
* Textual context before the error. Null-terminated.
* May be the empty string if not implemented by parser.
* @stable ICU 2.0
* @draft ICU 2.8
*/
private StringBuffer preContext = new StringBuffer();
/**
* Textual context after the error. Null-terminated.
* May be the empty string if not implemented by parser.
* @stable ICU 2.0
* @draft ICU 2.8
*/
private StringBuffer postContext = new StringBuffer();
public static final int PARSE_CONTEXT_LEN = 16;
public void setOffset(int offset){
this.offset = offset;
}
public int getOffset(){
return offset;
}
public int getLineNumber(){
return line;
}
public int setLineNumber(int lineNumber){
return line;
}
public String getPreContext(){
return preContext.toString();
}
public String getPostContext(){
return postContext.toString();
}
public void setPreContext(String str, int pos){
private void setPreContext(String str, int pos){
setPreContext(str.toCharArray(),pos);
}
public void setPreContext(char[] str, int pos){
private void setPreContext(char[] str, int pos){
int start = (pos <= PARSE_CONTEXT_LEN)? 0 : (pos - (PARSE_CONTEXT_LEN-1));
int len = (start <= PARSE_CONTEXT_LEN)? start : PARSE_CONTEXT_LEN;
preContext.append(str,start,len);
}
public void setPostContext(String str, int pos){
private void setPostContext(String str, int pos){
setPostContext(str.toCharArray(),pos);
}
public void setPostContext(char[] str, int pos){
private void setPostContext(char[] str, int pos){
int start = pos;
int len = str.length - start;
postContext.append(str,start,len);
}
public void setContext(char[]str,int pos){
setPreContext(str,pos);
setPostContext(str,pos);
}
public void setContext(String str,int pos){
private void setContext(String str,int pos){
setPreContext(str,pos);
setPostContext(str,pos);
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/Punycode.java,v $
* $Date: 2003/08/21 23:40:39 $
* $Revision: 1.1 $
* $Date: 2003/08/27 03:09:08 $
* $Revision: 1.2 $
*
*****************************************************************************************
*/
@ -16,13 +16,10 @@ import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UTF16;
/**
* Ported code from ICU punycode.c
* @author ram
*
* To change this generated comment edit the template variable "typecomment":
* Window>Preferences>Java>Templates.
* To enable and disable the creation of type comments go to
* Window>Preferences>Java>Code Generation.
*/
/* Package Private class */
final class Punycode {
@ -131,7 +128,16 @@ final class Punycode {
return (char)((ZERO-26)+digit);
}
}
/**
* Converts Unicode to Punycode.
* The input string must not contain single, unpaired surrogates.
* The output will be represented as an array of ASCII code points.
*
* @param src
* @param caseFlags
* @return
* @throws ParseException
*/
public static StringBuffer encode(StringBuffer src, boolean[] caseFlags) throws ParseException{
int[] cpBuffer = new int[MAX_CP_COUNT];
@ -283,6 +289,15 @@ final class Punycode {
private static boolean isSurrogate(int ch){
return (((ch)&0xfffff800)==0xd800);
}
/**
* Converts Punycode to Unicode.
* The Unicode string will be at most as long as the Punycode string.
*
* @param src
* @param caseFlags
* @return
* @throws ParseException
*/
public static StringBuffer decode(StringBuffer src, boolean[] caseFlags)
throws ParseException{
int srcLength = src.length();

View file

@ -4,8 +4,8 @@
* others. All Rights Reserved. *
*******************************************************************************
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/StringPrep.java,v $
* $Date: 2003/08/21 23:40:41 $
* $Revision: 1.1 $
* $Date: 2003/08/27 03:09:08 $
* $Revision: 1.2 $
*
*****************************************************************************************
*/
@ -27,12 +27,37 @@ import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UCharacterDirection;
/**
* @author ram
*
* To change the template for this generated type comment go to
* Window>Preferences>Java>Code Generation>Code and Comments
* StringPrep API implements the StingPrep framework as described by RFC 3454.
* StringPrep prepares Unicode strings for use in network protocols.
* Profiles of StingPrep are set of rules and data according to with the
* Unicode Strings are prepared. Each profiles contains tables which describe
* how a code point should be treated. The tables are broadly classied into
* <ul>
* <li> Unassinged Table: Contains code points that are unassigned
* in the Unicode Version supported by StringPrep. Currently
* RFC 3454 supports Unicode 3.2. </li>
* <li> Prohibited Table: Contains code points that are prohibted from
* the output of the StringPrep processing function. </li>
* <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li>
* </ul>
*
* The procedure for preparing Unicode strings:
* <ol>
* <li> Map: For each character in the input, check if it has a mapping
* and, if so, replace it with its mapping. </li>
* <li> Normalize: Possibly normalize the result of step 1 using Unicode
* normalization. </li>
* <li> Prohibit: Check for any characters that are not allowed in the
* output. If any are found, return an error.</li>
* <li> Check bidi: Possibly check for right-to-left characters, and if
* any are found, make sure that the whole string satisfies the
* requirements for bidirectional strings. If the string does not
* satisfy the requirements for bidirectional strings, return an
* error. </li>
* </ol>
* @author Ram Viswanadha
*/
public class StringPrep {
public final class StringPrep {
/**
* Option to prohibit processing of unassigned code points in the input
*

View file

@ -0,0 +1,19 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<html>
<head><!-- Copyright (C) 2003, International Business Machines Corporation and
others. All Rights Reserved.
$Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/package.html,v $
$Revision: 1.1 $
$Date: 2003/08/27 03:09:08 $
-->
<title>C:\cvs\icu4j\src\com\ibm\demo\package.html</title>
</head>
<body bgcolor="white">
<p>StringPrep and IDNA support.</p>
</body>
</html>