ICU-3064 Add stringPrep tests to TestAll

X-SVN-Rev: 12959
2025-04-20 20:19:32 +00:00 · 2003-08-27 03:09:08 +00:00 · 2003-08-27 03:09:08 +00:00 · fe2e828dca
commit fe2e828dca
parent 4d18bb2b4f
8 changed files with 213 additions and 82 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/TestAll.java
+++ b/icu4j/src/com/ibm/icu/dev/test/TestAll.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/TestAll.java,v $
- * $Date: 2003/06/03 18:49:28 $
- * $Revision: 1.50 $
+ * $Date: 2003/08/27 03:07:43 $
+ * $Revision: 1.51 $
 *
 *****************************************************************************************
 */
@ -40,7 +40,8 @@ public class TestAll extends TestGroup {
                  "com.ibm.icu.dev.test.util.TestAll",
                  "com.ibm.icu.dev.test.iterator.TestUCharacterIterator", // not a group
                  "com.ibm.icu.dev.test.bigdec.DiagBigDecimal", // not a group
-                  "com.ibm.icu.dev.test.impl.TestAll"
+                  "com.ibm.icu.dev.test.impl.TestAll",
+                  "com.ibm.icu.dev.test.stringprep.TestAll"
              },
              "All tests in ICU");
    }
--- a/icu4j/src/com/ibm/icu/dev/test/stringprep/NFS4StringPrep.java
+++ b/icu4j/src/com/ibm/icu/dev/test/stringprep/NFS4StringPrep.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/NFS4StringPrep.java,v $
- * $Date: 2003/08/21 23:42:25 $
- * $Revision: 1.1 $
+ * $Date: 2003/08/27 03:08:29 $
+ * $Revision: 1.2 $
 *
 *******************************************************************************
 */
@ -44,11 +44,7 @@ public final class NFS4StringPrep {
    //singleton instance
    private static NFS4StringPrep prep = null;
    
-    // we donot synchronize the constructor because we
-    // know that the constructor is only called from
-    // getInstance method if and only if the the singleton
-    // intance is null, which means this constructor is called
-    // only once
+
    private  NFS4StringPrep ()throws IOException{
      
      InputStream  nfscssFile = TestUtil.getDataStream(NFS4DataFileNames[0]);
--- a/icu4j/src/com/ibm/icu/dev/test/stringprep/TestStringPrep.java
+++ b/icu4j/src/com/ibm/icu/dev/test/stringprep/TestStringPrep.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/TestStringPrep.java,v $
- * $Date: 2003/08/21 23:42:21 $
- * $Revision: 1.1 $
+ * $Date: 2003/08/27 03:08:29 $
+ * $Revision: 1.2 $
 *
 *******************************************************************************
 */
@ -17,8 +17,6 @@ import com.ibm.icu.dev.test.TestFmwk;
 /**
 * @author ram
 *
- * To change the template for this generated type comment go to
- * Window>Preferences>Java>Code Generation>Code and Comments
 */
 public class TestStringPrep extends TestFmwk {
    public static void main(String[] args) throws Exception {
@ -152,11 +150,7 @@ public class TestStringPrep extends TestFmwk {

        }
    }
-    private static String[] cs_prep_data = {
-        //BIDI checking is turned off .. so 
-        "\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774\u0644\u064A\u0647\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74",

-    };
    public void TestCSPrep(){
        
        // Checking for bidi is turned off
--- a/icu4j/src/com/ibm/icu/stringprep/IDNA.java
+++ b/icu4j/src/com/ibm/icu/stringprep/IDNA.java
@ -4,8 +4,8 @@
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/IDNA.java,v $
- * $Date: 2003/08/21 23:40:42 $
- * $Revision: 1.1 $ 
+ * $Date: 2003/08/27 03:09:08 $
+ * $Revision: 1.2 $ 
 *
 *****************************************************************************************
 */
@ -19,7 +19,7 @@ import com.ibm.icu.text.UCharacterIterator;

 /**
 *
- * UIDNA API implements the IDNA protocol as defined in the IDNA draft 
+ * IDNA API implements the IDNA protocol as defined in the IDNA draft 
 * (http://www.ietf.org/rfc/rfc3490.txt).
 * The draft defines 2 operations: ToASCII and ToUnicode. Domain labels 
 * containing non-ASCII code points are required to be processed by
@ -38,7 +38,8 @@ import com.ibm.icu.text.UCharacterIterator;
 * once.
 * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string) 
 * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
- *
+ * 
+ * @author Ram Viswanadha
 */
 public final class IDNA {

@ -59,26 +60,27 @@ public final class IDNA {
     * do not check if the input conforms to STD-3 ASCII rules.
     * 
     * @see  convertToASCII convertToUnicode
-     * @draft ICU 2.6
+     * @draft ICU 2.8
     */
    public static final int DEFAULT             = 0x0000;
    /** 
     * Option to allow processing of unassigned codepoints in the input
     * 
     * @see  convertToASCII convertToUnicode
-     * @draft ICU 2.6
+     * @draft ICU 2.8
     */
    public static final int ALLOW_UNASSIGNED    = 0x0001;
    /** 
     * Option to check if input conforms to STD-3 ASCII rules
     * 
     * @see convertToASCII convertToUnicode
-     * @draft ICU 2.6
+     * @draft ICU 2.8
     */
    public static final int USE_STD3_RULES      = 0x0002;
    
    private static StringPrep prep  = null;
    
+  
    private static synchronized void loadInstance()
                                throws IOException{
        if(prep==null){
@ -187,6 +189,9 @@ public final class IDNA {
        return false;
    }
    
+    /* private constructor to prevent construction of the object */
+    private IDNA(){}
+    
    /**
     * This function implements the ToASCII operation as defined in the IDNA RFC.
     * This operation is done on <b>single labels</b> before sending it to something that expects
@ -380,7 +385,7 @@ public final class IDNA {
     * and then convert. This function does not offer that level of granularity. The options once  
     * set will apply to all labels in the domain name
     *
-     * @param src       The input string as UCharacterIterator to be processed
+     * @param iter      The input string as UCharacterIterator to be processed
     * @param options   A bit set of options:
     *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
     *                              and do not use STD3 ASCII rules
@ -566,7 +571,7 @@ public final class IDNA {
     * separated by dots; for e.g." "www.example.com" is composed of 3 labels 
     * "www","example", and "com".
     * 
-     * @param src       The input string as UCharacterIterator to be processed
+     * @param iter       The input string as UCharacterIterator to be processed
     * @param options   A bit set of options:
     *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
     *                              and do not use STD3 ASCII rules
@ -690,7 +695,7 @@ public final class IDNA {
     * and then convert. This function does not offer that level of granularity. The options once  
     * set will apply to all labels in the domain name
     *
-     * @param src       The input string as UCharacterIterator to be processed
+     * @param iter       The input string as UCharacterIterator to be processed
     * @param options   A bit set of options:
     *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
     *                              and do not use STD3 ASCII rules
--- a/icu4j/src/com/ibm/icu/stringprep/ParseException.java
+++ b/icu4j/src/com/ibm/icu/stringprep/ParseException.java
@ -4,48 +4,140 @@
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/ParseException.java,v $
- * $Date: 2003/08/21 23:40:39 $
- * $Revision: 1.1 $ 
+ * $Date: 2003/08/27 03:09:08 $
+ * $Revision: 1.2 $ 
 *
 *****************************************************************************************
 */
 package com.ibm.icu.stringprep;

 /**
- * @author ram
+ * Exception that signals an error has occurred while parsing the 
+ * input to StringPrep or IDNA. 
 *
- * To change the template for this generated type comment go to
- * Window>Preferences>Java>Code Generation>Code and Comments
+ * @author Ram Viswanadha
 */
 public class ParseException extends Exception {
-    
+    /**
+     * @draft ICU 2.8
+     */
    public static final int INVALID_CHAR_FOUND      = 0;
+    /**
+     * @draft ICU 2.8
+     */
    public static final int ILLEGAL_CHAR_FOUND      = 1;
+    /**
+     * @draft ICU 2.8
+     */
    public static final int PROHIBITED_ERROR        = 2;
+    /**
+     * @draft ICU 2.8
+     */
    public static final int UNASSIGNED_ERROR        = 3;
+    /**
+     * @draft ICU 2.8
+     */
    public static final int CHECK_BIDI_ERROR        = 4;
+    /**
+     * @draft ICU 2.8
+     */
    public static final int STD3_ASCII_RULES_ERROR  = 5;
+    /**
+     * @draft ICU 2.8
+     */
    public static final int ACE_PREFIX_ERROR        = 6;
+    /**
+     * @draft ICU 2.8
+     */
    public static final int VERIFICATION_ERROR      = 7;
+    /**
+     * @draft ICU 2.8
+     */
    public static final int LABEL_TOO_LONG_ERROR    = 8;
+    /**
+     * @draft ICU 2.8
+     */
    public static final int BUFFER_OVERFLOW_ERROR   = 9;
    
+    /**
+     * Construct a ParseException object with the given message
+     * and error code
+     * 
+     * @param message A string describing the type of error that occurred
+     * @param error   The error that has occurred
+     * @draft ICU 2.8
+     */
    public ParseException(String message,int error){
        super(message);
        this.error = error;
+        this.offset = -1;
+        this.line = 0;
    }
+    
+    /**
+     * Construct a ParseException object with the given message and
+     * error code
+     * 
+     * @param message A string describing the type of error that occurred
+     * @param error   The error that has occurred
+     * @param rules   The input rules string 
+     * @param pos     The position of error in the rules string
+     * @draft ICU 2.8
+     */
    public ParseException(String message,int error, String rules, int pos){
        super(message);
        this.error = error;
-        setContext(rules,pos);    
+        setContext(rules,pos);  
+        this.offset = -1;  
+        this.line = 0;
    }
-    
+    /**
+     * Construct  a ParseException object with the given message and error code
+     * 
+     * @param message    A string describing the type of error that occurred
+     * @param error      The error that has occurred
+     * @param rules      The input rules string 
+     * @param pos        The position of error in the rules string
+     * @param offset     The character offset to the error.  If the line field is
+     *                   being used, then this offset is from the start of the line.
+     *                   If the line field is not being used, then this offset is from
+     *                   the start of the text.The default value of this field
+     *                   is -1. 
+     * @param lineNumber The line number at which the error has occurred. 
+     *                   If the parse engine is not using this field, it should set it to zero.  Otherwise
+     *                   it should be a positive integer. The default value of this field
+     *                   is -1. It will be set to 0 if the code populating this struct is not
+     *                   using line numbers.
+     */
+    public ParseException(String message, int error, String rules, int pos, int offset, int lineNumber){
+        super(message);
+        this.error = error;
+        setContext(rules,pos);  
+        this.offset = offset;  
+        this.line = lineNumber;
+    }
+    /**
+     * Compare this ParseException to another and evaluate if they are equal.
+     * The comparison works only on the type of error and does not compare
+     * the rules strings, if any, for equality.
+     * 
+     * @param other The exception that this object should be compared to
+     * @return true if the objects are equal, false if unequal
+     * @draft ICU 2.8
+     */
    public boolean equals(Object other){
        if(!(other instanceof ParseException)){
            return false;
        }
        return ((ParseException)other).error == this.error;
+        
    }
+    /**
+     * Returns the position of error in the rules string
+     * 
+     * @return String
+     * @draft ICU 2.8
+     */
    public String toString(){
        StringBuffer buf = new StringBuffer();
        buf.append(super.getMessage());
@ -58,13 +150,14 @@ public class ParseException extends Exception {
    }

    private int error;
+    
    /**
     * The line on which the error occured.  If the parse engine
     * is not using this field, it should set it to zero.  Otherwise
     * it should be a positive integer. The default value of this field
     * is -1. It will be set to 0 if the code populating this struct is not
     * using line numbers.
-     * @stable ICU 2.0    
+     * @draft ICU 2.8  
     */
    private int line;

@ -75,68 +168,51 @@ public class ParseException extends Exception {
     * the start of the text.The default value of this field
     * is -1. It will be set to appropriate value by the code that 
     * populating the struct.
-     * @stable ICU 2.0   
+     * @draft ICU 2.8 
     */
    private int    offset;

    /**
     * Textual context before the error.  Null-terminated.
     * May be the empty string if not implemented by parser.
-     * @stable ICU 2.0   
+     * @draft ICU 2.8
     */
    private StringBuffer preContext = new StringBuffer();

    /**
     * Textual context after the error.  Null-terminated.
     * May be the empty string if not implemented by parser.
-     * @stable ICU 2.0   
+     * @draft ICU 2.8   
     */
    private StringBuffer postContext =  new StringBuffer();
    
    public static final int PARSE_CONTEXT_LEN = 16;
    
-    public void setOffset(int offset){
-        this.offset = offset;
-    }
-    public int getOffset(){
-        return offset;
-    }
-    public int getLineNumber(){
-        return line;
-    }
-    public int setLineNumber(int lineNumber){
-        return line;
-    }
-    public String getPreContext(){
-        return preContext.toString();
-    }
-    public String getPostContext(){
-        return postContext.toString();
-    }
+
    
-    public void setPreContext(String str, int pos){
+    private void setPreContext(String str, int pos){
        setPreContext(str.toCharArray(),pos);
    }
-    public void setPreContext(char[] str, int pos){
+    
+    private void setPreContext(char[] str, int pos){
        int start = (pos <= PARSE_CONTEXT_LEN)? 0 : (pos - (PARSE_CONTEXT_LEN-1));
        int len = (start <= PARSE_CONTEXT_LEN)? start : PARSE_CONTEXT_LEN;
        preContext.append(str,start,len);
 
    }
-    public void setPostContext(String str, int pos){
+    
+    private void setPostContext(String str, int pos){
        setPostContext(str.toCharArray(),pos);
    }
-    public void setPostContext(char[] str, int pos){
+    
+    private void setPostContext(char[] str, int pos){
        int start = pos;
        int len  = str.length - start; 
        postContext.append(str,start,len);

    }
-    public void setContext(char[]str,int pos){
-        setPreContext(str,pos);
-        setPostContext(str,pos);
-    }
-    public void setContext(String str,int pos){
+    
+    private void setContext(String str,int pos){
        setPreContext(str,pos);
        setPostContext(str,pos);
    }
--- a/icu4j/src/com/ibm/icu/stringprep/Punycode.java
+++ b/icu4j/src/com/ibm/icu/stringprep/Punycode.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/Punycode.java,v $ 
- * $Date: 2003/08/21 23:40:39 $ 
- * $Revision: 1.1 $
+ * $Date: 2003/08/27 03:09:08 $ 
+ * $Revision: 1.2 $
 *
 *****************************************************************************************
 */
@ -16,13 +16,10 @@ import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.text.UTF16;

 /**
+ * Ported code from ICU punycode.c 
 * @author ram
- *
- * To change this generated comment edit the template variable "typecomment":
- * Window>Preferences>Java>Templates.
- * To enable and disable the creation of type comments go to
- * Window>Preferences>Java>Code Generation.
 */
+
 /* Package Private class */
 final class Punycode {

@ -131,7 +128,16 @@ final class Punycode {
            return (char)((ZERO-26)+digit);
        }
    }
-    
+    /**
+     * Converts Unicode to Punycode.
+     * The input string must not contain single, unpaired surrogates.
+     * The output will be represented as an array of ASCII code points.
+     * 
+     * @param src
+     * @param caseFlags
+     * @return
+     * @throws ParseException
+     */
    public static StringBuffer encode(StringBuffer src, boolean[] caseFlags) throws ParseException{
 		
        int[] cpBuffer = new int[MAX_CP_COUNT];
@ -283,6 +289,15 @@ final class Punycode {
    private static boolean isSurrogate(int ch){
        return (((ch)&0xfffff800)==0xd800);
    }
+    /**
+     * Converts Punycode to Unicode.
+     * The Unicode string will be at most as long as the Punycode string.
+     * 
+     * @param src
+     * @param caseFlags
+     * @return
+     * @throws ParseException
+     */
    public static StringBuffer decode(StringBuffer src, boolean[] caseFlags) 
                               throws ParseException{
        int srcLength = src.length();
--- a/icu4j/src/com/ibm/icu/stringprep/StringPrep.java
+++ b/icu4j/src/com/ibm/icu/stringprep/StringPrep.java
@ -4,8 +4,8 @@
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/StringPrep.java,v $
- * $Date: 2003/08/21 23:40:41 $
- * $Revision: 1.1 $ 
+ * $Date: 2003/08/27 03:09:08 $
+ * $Revision: 1.2 $ 
 *
 *****************************************************************************************
 */
@ -27,12 +27,37 @@ import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UCharacterDirection;

 /**
- * @author ram
- *
- * To change the template for this generated type comment go to
- * Window>Preferences>Java>Code Generation>Code and Comments
+ * StringPrep API implements the StingPrep framework as described by RFC 3454.
+ * StringPrep prepares Unicode strings for use in network protocols.
+ * Profiles of StingPrep are set of rules and data according to with the
+ * Unicode Strings are prepared. Each profiles contains tables which describe
+ * how a code point should be treated. The tables are broadly classied into
+ * <ul>
+ *     <li> Unassinged Table: Contains code points that are unassigned 
+ *          in the Unicode Version supported by StringPrep. Currently 
+ *          RFC 3454 supports Unicode 3.2. </li>
+ *     <li> Prohibited Table: Contains code points that are prohibted from
+ *          the output of the StringPrep processing function. </li>
+ *     <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li>
+ * </ul>
+ * 
+ * The procedure for preparing Unicode strings:
+ * <ol>
+ *      <li> Map: For each character in the input, check if it has a mapping
+ *           and, if so, replace it with its mapping. </li>
+ *      <li> Normalize: Possibly normalize the result of step 1 using Unicode
+ *           normalization. </li>
+ *      <li> Prohibit: Check for any characters that are not allowed in the
+ *        output.  If any are found, return an error.</li>
+ *      <li> Check bidi: Possibly check for right-to-left characters, and if
+ *           any are found, make sure that the whole string satisfies the
+ *           requirements for bidirectional strings.  If the string does not
+ *           satisfy the requirements for bidirectional strings, return an
+ *           error.  </li>
+ * </ol>
+ * @author Ram Viswanadha
 */
-public class StringPrep {
+public final class StringPrep {
    /** 
     * Option to prohibit processing of unassigned code points in the input
     * 
--- a/icu4j/src/com/ibm/icu/stringprep/package.html
+++ b/icu4j/src/com/ibm/icu/stringprep/package.html
@ -0,0 +1,19 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+
+<head><!--  Copyright (C) 2003, International Business Machines Corporation and
+  others. All Rights Reserved.
+
+  $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/stringprep/Attic/package.html,v $
+  $Revision: 1.1 $
+  $Date: 2003/08/27 03:09:08 $
+-->
+
+<title>C:\cvs\icu4j\src\com\ibm\demo\package.html</title>
+</head>
+
+<body bgcolor="white">
+
+<p>StringPrep and IDNA support.</p>
+</body>
+</html>