diff --git a/tools/unicodetools/com/ibm/text/UCA/CEList.java b/tools/unicodetools/com/ibm/text/UCA/CEList.java index 05c4650d67f..24f0073b3a2 100644 --- a/tools/unicodetools/com/ibm/text/UCA/CEList.java +++ b/tools/unicodetools/com/ibm/text/UCA/CEList.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/CEList.java,v $ +* $Date: 2001/08/31 00:20:40 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.UCA; import com.ibm.text.UCD.*; import com.ibm.text.utility.*; diff --git a/tools/unicodetools/com/ibm/text/UCA/Case.java b/tools/unicodetools/com/ibm/text/UCA/Case.java index a3f5bad605f..482a63b6b62 100644 --- a/tools/unicodetools/com/ibm/text/UCA/Case.java +++ b/tools/unicodetools/com/ibm/text/UCA/Case.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Case.java,v $ +* $Date: 2001/08/31 00:20:40 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.UCA; public final class Case { diff --git a/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java b/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java index ed634670be9..43d710715cf 100644 --- a/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java +++ b/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/GenOverlap.java,v $ +* $Date: 2001/08/31 00:20:40 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.UCA; import java.util.*; diff --git a/tools/unicodetools/com/ibm/text/UCA/RuleComparator.java b/tools/unicodetools/com/ibm/text/UCA/RuleComparator.java index a85baf86967..63968acbbdb 100644 --- a/tools/unicodetools/com/ibm/text/UCA/RuleComparator.java +++ b/tools/unicodetools/com/ibm/text/UCA/RuleComparator.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/RuleComparator.java,v $ +* $Date: 2001/08/31 00:20:40 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.UCA; import com.ibm.text.UCD.*; diff --git a/tools/unicodetools/com/ibm/text/UCA/UCA.java b/tools/unicodetools/com/ibm/text/UCA/UCA.java index 49817ffa245..b52e0ca7bb7 100644 --- a/tools/unicodetools/com/ibm/text/UCA/UCA.java +++ b/tools/unicodetools/com/ibm/text/UCA/UCA.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $ +* $Date: 2001/08/31 00:20:40 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.UCA; import java.util.*; diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java index dd6f28a8c5f..7e1048e57e6 100644 --- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java +++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $ +* $Date: 2001/08/31 00:20:39 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.UCA; import java.util.*; diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java b/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java index 9234c8d6cab..d7c040f32b1 100644 --- a/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java +++ b/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java,v $ +* $Date: 2001/08/31 00:20:39 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.UCA; import java.util.*; diff --git a/tools/unicodetools/com/ibm/text/utility/ChainException.java b/tools/unicodetools/com/ibm/text/utility/ChainException.java index 2d0a760ef01..73713e48aca 100644 --- a/tools/unicodetools/com/ibm/text/utility/ChainException.java +++ b/tools/unicodetools/com/ibm/text/utility/ChainException.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/ChainException.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; diff --git a/tools/unicodetools/com/ibm/text/utility/CompactByteArray.java b/tools/unicodetools/com/ibm/text/utility/CompactByteArray.java index 8b3f555ae49..98ad3ce60a2 100644 --- a/tools/unicodetools/com/ibm/text/utility/CompactByteArray.java +++ b/tools/unicodetools/com/ibm/text/utility/CompactByteArray.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/CompactByteArray.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; /* diff --git a/tools/unicodetools/com/ibm/text/utility/CompactShortArray.java b/tools/unicodetools/com/ibm/text/utility/CompactShortArray.java index 6b2886042ae..091a2e0da16 100644 --- a/tools/unicodetools/com/ibm/text/utility/CompactShortArray.java +++ b/tools/unicodetools/com/ibm/text/utility/CompactShortArray.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/CompactShortArray.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; diff --git a/tools/unicodetools/com/ibm/text/utility/Counter.java b/tools/unicodetools/com/ibm/text/utility/Counter.java index 5719dc8c8e2..bf404a6de8b 100644 --- a/tools/unicodetools/com/ibm/text/utility/Counter.java +++ b/tools/unicodetools/com/ibm/text/utility/Counter.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Counter.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; diff --git a/tools/unicodetools/com/ibm/text/utility/Differ.java b/tools/unicodetools/com/ibm/text/utility/Differ.java index eb6d81aec2f..f806b45d8f1 100644 --- a/tools/unicodetools/com/ibm/text/utility/Differ.java +++ b/tools/unicodetools/com/ibm/text/utility/Differ.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Differ.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; /** Basic Diff program. Compares two sequences of objects fed into it, and diff --git a/tools/unicodetools/com/ibm/text/utility/DifferTest.java b/tools/unicodetools/com/ibm/text/utility/DifferTest.java index e382d6b8bf5..28e74473862 100644 --- a/tools/unicodetools/com/ibm/text/utility/DifferTest.java +++ b/tools/unicodetools/com/ibm/text/utility/DifferTest.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/DifferTest.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; diff --git a/tools/unicodetools/com/ibm/text/utility/DualWriter.java b/tools/unicodetools/com/ibm/text/utility/DualWriter.java index 891dc417ef0..a88288e191f 100644 --- a/tools/unicodetools/com/ibm/text/utility/DualWriter.java +++ b/tools/unicodetools/com/ibm/text/utility/DualWriter.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/DualWriter.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; import java.awt.*; diff --git a/tools/unicodetools/com/ibm/text/utility/EquivalenceClass.java b/tools/unicodetools/com/ibm/text/utility/EquivalenceClass.java index b7f6fb0b8b3..aaf54bfc711 100644 --- a/tools/unicodetools/com/ibm/text/utility/EquivalenceClass.java +++ b/tools/unicodetools/com/ibm/text/utility/EquivalenceClass.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/EquivalenceClass.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; diff --git a/tools/unicodetools/com/ibm/text/utility/IndentWriter.java b/tools/unicodetools/com/ibm/text/utility/IndentWriter.java index 96246be465b..3b8ced45d28 100644 --- a/tools/unicodetools/com/ibm/text/utility/IndentWriter.java +++ b/tools/unicodetools/com/ibm/text/utility/IndentWriter.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/IndentWriter.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; import java.io.*; diff --git a/tools/unicodetools/com/ibm/text/utility/IntStack.java b/tools/unicodetools/com/ibm/text/utility/IntStack.java index b4de9d43904..94d14016971 100644 --- a/tools/unicodetools/com/ibm/text/utility/IntStack.java +++ b/tools/unicodetools/com/ibm/text/utility/IntStack.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/IntStack.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; // ============================================================= diff --git a/tools/unicodetools/com/ibm/text/utility/LengthFirstComparator.java b/tools/unicodetools/com/ibm/text/utility/LengthFirstComparator.java index 7fb889b2cff..49170f67ed7 100644 --- a/tools/unicodetools/com/ibm/text/utility/LengthFirstComparator.java +++ b/tools/unicodetools/com/ibm/text/utility/LengthFirstComparator.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/LengthFirstComparator.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; import java.util.*; diff --git a/tools/unicodetools/com/ibm/text/utility/Pair.java b/tools/unicodetools/com/ibm/text/utility/Pair.java index 63b28e89020..55fdf15ade3 100644 --- a/tools/unicodetools/com/ibm/text/utility/Pair.java +++ b/tools/unicodetools/com/ibm/text/utility/Pair.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Pair.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; public final class Pair implements java.lang.Comparable { diff --git a/tools/unicodetools/com/ibm/text/utility/UTF16Plus.java b/tools/unicodetools/com/ibm/text/utility/UTF16Plus.java index 9b4ce48c546..8385a2d0a68 100644 --- a/tools/unicodetools/com/ibm/text/utility/UTF16Plus.java +++ b/tools/unicodetools/com/ibm/text/utility/UTF16Plus.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/UTF16Plus.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; public class UTF16Plus { diff --git a/tools/unicodetools/com/ibm/text/utility/UTF32.java b/tools/unicodetools/com/ibm/text/utility/UTF32.java index d9bb69fbce7..5e34251930c 100644 --- a/tools/unicodetools/com/ibm/text/utility/UTF32.java +++ b/tools/unicodetools/com/ibm/text/utility/UTF32.java @@ -1,10 +1,23 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/UTF32.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; /** * Utility class for demonstrating UTF16 character conversions and indexing conversions. * Ideally, these methods would be on existing classes in Java, but they can also be used * in a stand-alone utility class like this one. -*
Code that uses strings alone rarely need modification. +*
Code that uses strings alone rarely need modification. * By design, UTF-16 does not allow overlap, so searching for strings is a safe operation. * Similarly, concatenation is always safe. Substringing is safe if the start and end are both * on UTF32 boundaries. In normal code, the values for start and end are on those boundaries, @@ -17,14 +30,14 @@ package com.ibm.text.utility; * They are used for iteration, filtering and copying. See the examples below. *
bounds32()
is useful for finding the nearest UTF-32 boundaries.
-* However, in most circumstances it is better to use
+* However, in most circumstances it is better to use
*
* BreakIterator.getCharacterInstance(Locale) to find character boundaries
* that are closer to end-user expectations.
* valueOf32()
is occasionally convenient for producing a string containing a UTF-32 value.
+* valueOf32()
is occasionally convenient for producing a string containing a UTF-32 value.
* findOffset16()
and findOffset32()
are generally not needed,
+* findOffset16()
and findOffset32()
are generally not needed,
* except when interfacing to specifications that use UTF-32 indices (such as XSL).
* isLegal()
can be used to test whether UTF-16 or UTF-32 values are valid.
@@ -32,11 +45,11 @@ package com.ibm.text.utility;
* isLeadSurrogate()
, isSurrogate()
, and isTrailSurrogate()
* test the type of a char. They are useful for lower-level code.
* getChar32()
, getLead()
, and getTrail()
+* getChar32()
, getLead()
, and getTrail()
* are sometimes useful for putting together and taking apart UTF-32 values.
* The following examples illustrate use of some of these methods. +*
The following examples illustrate use of some of these methods.
// iteration forwards: Original for (int i = 0; i < s.length(); ++i) { @@ -69,7 +82,7 @@ for (int i = s.length()-1; i > 0; i-=UTF32.count16(ch)) { *
Lead
and Trail
in the API,
* which gives a better sense of their ordering in a string. offset16
and offset32
are used to distinguish
-* offsets to UTF-16 boundaries vs offsets to UTF-32 boundaries.
+* offsets to UTF-16 boundaries vs offsets to UTF-32 boundaries.
* int char32
is used to contain UTF-32 characters, as opposed to char
, which is a UTF-16 code unit.
* bounds(string, offset16) != TRAIL
.
* isLegal()
can be used to check for validity if desired.
* char32
contains an out-of-bounds UTF-32 value,
+* Out-of-bounds UTF-32 values: If a char32
contains an out-of-bounds UTF-32 value,
* then it is treated as REPLACEMENT_CHAR for consistency across the API.
* bounds32()
.
*/
@@ -118,21 +131,21 @@ public final class UTF32 {
* If a validity check is required, use isLegal()
* on char32 before calling.
* If this were integrated into the Java API, it could be a static method of either Character or String.
- * @return 2 if is in surrogate space, otherwise 1.
+ * @return 2 if is in surrogate space, otherwise 1.
* @param ch the input character.
*/
public static int count16(int char32) {
if (char32 < MIN_SUPPLEMENTARY) return 1;
return 2;
}
-
+
/**
* Extract a single UTF-32 value from a string.
* Used when iterating forwards or backwards (with count16()
, as well as random access.
* If a validity check is required, use isLegal()
on the return value.
*
If this were integrated into the Java API, it could be a method of String, StringBuffer and possibly CharacterIterator.
* @return UTF-32 value for the UTF-32 value that contains the char at offset16.
- * The boundaries of that codepoint are the same as in bounds32()
.
+ * The boundaries of that codepoint are the same as in bounds32()
.
* @param source array of UTF-16 chars
* @param offset16 UTF-16 offset to the start of the character.
*/
@@ -141,11 +154,11 @@ public final class UTF32 {
if (!isSurrogate(single)) return single;
try { // use exception to catch out-of-bounds
-
+
// Convert the UTF-16 surrogate pair if necessary.
// For simplicity in usage, and because the frequency of pairs is low,
// look both directions.
-
+
if (isLeadSurrogate(single)) {
char trail = source.charAt(++offset16);
if (isTrailSurrogate(trail)) {
@@ -166,11 +179,11 @@ public final class UTF32 {
if (!isSurrogate(single)) return single;
try { // use exception to catch out-of-bounds
-
+
// Convert the UTF-16 surrogate pair if necessary.
// For simplicity in usage, and because the frequency of pairs is low,
// look both directions.
-
+
if (isLeadSurrogate(single)) {
char trail = source.charAt(++offset16);
if (isTrailSurrogate(trail)) {
@@ -185,21 +198,21 @@ public final class UTF32 {
} catch (StringIndexOutOfBoundsException e) {}
return single; // return unmatched surrogate
}
-
+
public static int char32At(char[] source, int start16, int end16, int offset16) {
if (offset16 < start16 || offset16 >= end16) {
throw new ArrayIndexOutOfBoundsException(offset16);
}
-
+
char single = source[offset16];
if (!isSurrogate(single)) return single;
try { // use exception to catch out-of-bounds
-
+
// Convert the UTF-16 surrogate pair if necessary.
// For simplicity in usage, and because the frequency of pairs is low,
// look both directions.
-
+
if (isLeadSurrogate(single)) {
++offset16;
if (offset16 >= end16) return single;
@@ -216,8 +229,8 @@ public final class UTF32 {
} catch (ArrayIndexOutOfBoundsException e) {}
return single; // return unmatched surrogate
}
-
-
+
+
// moral equivalent of valueOf32(charAt32(x)), but no memory alloc
public static String getCodePointSubstring(String s, int offset16) {
switch(bounds32(s,offset16)) {
@@ -275,16 +288,16 @@ public final class UTF32 {
// mismatch, just use long form
b.replace(position, end+1, valueOf32(codePoint));
}
-
+
/**
* See if a char value is legal. It can't be:
*
If this were integrated into the Java API, it could be a static method of String or Character. * @param UTF-32 value to test - * @return true iff legal. + * @return true iff legal. */ public static boolean isLegal(char char16) { return (char16 < 0xFFFE); @@ -300,7 +313,7 @@ public final class UTF32 { * Note: legal does not mean that it is assigned in this version of Unicode. *
If this were integrated into the Java API, it could be a static method of String or Character. * @param char32 UTF-32 value to test - * @return true iff legal. + * @return true iff legal. */ public static boolean isLegal(int char32) { if (char32 < 0) return false; @@ -319,7 +332,7 @@ public final class UTF32 { public static boolean isSurrogate(int char32) { return (SURROGATE_BASE <= char32 && char32 < SURROGATE_LIMIT); } - + /** * Determines whether the code point is a supplementary. *
If this were integrated into the Java API, it could be a static method of String or Character. @@ -329,7 +342,7 @@ public final class UTF32 { public static boolean isSupplementary(int char32) { return (char32 >= MIN_SUPPLEMENTARY && char32 <= MAX_UNICODE); } - + /** * Determines whether the code point is a supplementary. *
If this were integrated into the Java API, it could be a static method of String or Character. @@ -339,7 +352,7 @@ public final class UTF32 { public static boolean isBasic(int char32) { return (char32 >= 0 && char32 < MIN_SUPPLEMENTARY); } - + /** * Determines whether the character is a trail surrogate. *
If this were integrated into the Java API, it could be a static method of String or Character. @@ -349,7 +362,7 @@ public final class UTF32 { public static boolean isTrailSurrogate(char ch) { return (TRAIL_BASE <= ch && ch < TRAIL_LIMIT); } - + /** * Determines whether the character is a lead surrogate. *
If this were integrated into the Java API, it could be a static method of String or Character.
@@ -359,7 +372,7 @@ public final class UTF32 {
public static boolean isLeadSurrogate(char ch) {
return (LEAD_BASE <= ch && ch < LEAD_LIMIT);
}
-
+
/**
* Returns the lead surrogate.
* If a validity check is required, use isLegal()
on char32 before calling.
@@ -374,7 +387,7 @@ public final class UTF32 {
}
return (char)char32;
}
-
+
/**
* Returns the trail surrogate.
* If a validity check is required, use isLegal()
on char32 before calling.
@@ -385,11 +398,11 @@ public final class UTF32 {
*/
public static char getTrail(int char32) {
if (char32 >= MIN_SUPPLEMENTARY) {
- return (char)(TRAIL_BASE + (char32 & TRAIL_MASK));
+ return (char)(TRAIL_BASE + (char32 & TRAIL_MASK));
}
return '\u0000';
}
-
+
/**
* Convenience method corresponding to String.valueOf(char). It returns a one or two char string containing
* the UTF-32 value. If the input value can't be converted, it substitutes REPLACEMENT_CHAR.
@@ -408,10 +421,10 @@ public final class UTF32 {
}
}
private static char[] buf2 = new char[2]; // used to avoid allocations
-
+
/**
* Returns the UTF-32 character corresponding to the two chars.
- * If a validity check is required, check the arguments with
+ * If a validity check is required, check the arguments with
* isLeadSurrogate()
and isTrailSurrogate()
, respectively before calling.
*
If this were integrated into the Java API, it could be a static method of String or Character. * @return the UTF-32 character, or REPLACEMENT_CHAR if invalid. @@ -424,7 +437,7 @@ public final class UTF32 { } return REPLACEMENT_CHAR; } - + /** * Returns the type of the UTF32 boundaries around the char at offset16. * Used for random access. @@ -470,7 +483,7 @@ public final class UTF32 { } return SINGLE; } - + // should be renamed bounds public static int bounds32(char[] source, int oStart, int oEnd, int offset16) { @@ -493,7 +506,7 @@ public final class UTF32 { /** - * Returns the UTF-16 offset that corresponds to a UTF-32 offset. + * Returns the UTF-16 offset that corresponds to a UTF-32 offset. * Used for random access. See the class description * for notes on roundtripping. *
If this were integrated into the Java API, it could be a method of String, StringBuffer and possibly CharacterIterator. @@ -506,7 +519,7 @@ public final class UTF32 { int remaining = offset32; // for decrementing boolean hadLeadSurrogate = false; int i; - + for (i = 0; remaining > 0 && i < source.length(); ++i) { char ch = source.charAt(i); if (hadLeadSurrogate && isTrailSurrogate(ch)) { @@ -516,15 +529,15 @@ public final class UTF32 { --remaining; // count others as 1 } } - + // if we didn't use up all of remaining (or if we started < 0) // then it is beyond the bounds - + if (remaining != 0) throw new StringIndexOutOfBoundsException(offset32); - + // special check for last surrogate if needed, for consistency with // other situations - + if (hadLeadSurrogate && i < source.length() && isTrailSurrogate(source.charAt(i))) { ++i; // grab extra unicode } @@ -574,13 +587,13 @@ public final class UTF32 { * @param target string to add to */ public static void append32(StringBuffer target, int char32) { - + // Check for irregular values - + if (char32 < 0 || char32 > MAX_UNICODE) char32 = REPLACEMENT_CHAR; - + // Write the UTF-16 values - + if (char32 >= MIN_SUPPLEMENTARY) { target.append((char)(LEAD_BASE_OFFSET + (char32 >> SURROGATE_SHIFT))); target.append((char)(TRAIL_BASE + (char32 & TRAIL_MASK))); @@ -588,7 +601,7 @@ public final class UTF32 { target.append((char)char32); } } - + /** * Compare strings using Unicode code point order, instead of UTF-16 code unit order. */ @@ -615,7 +628,7 @@ public final class UTF32 { char ca = sa.charAt(i); char cb = sb.charAt(i); if (ca == cb) continue; // skip remap if equal - + // start of only different section if (ca >= 0xD800) { // reshuffle to get right codepoint order ca += (ca < 0xE000) ? 0x2000 : -0x800; @@ -624,7 +637,7 @@ public final class UTF32 { cb += (cb < 0xE000) ? 0x2000 : -0x800; } // end of only different section - + if (ca < cb) return -1; return 1; // wasn't equal, so return 1 } @@ -633,75 +646,75 @@ public final class UTF32 { return 0; } } - + // =========================================================== // PRIVATES // =========================================================== - + /** * Prevent instance from being created. */ private UTF32() {} - + /** * Maximum code point values for UTF-32. */ private static final int MAX_UNICODE = 0x10FFFF; - + /** * Maximum values for Basic code points (BMP). */ private static final int MAX_BASIC = 0xFFFF; - + /** * Minimum value for Supplementary code points (SMP). */ private static final int MIN_SUPPLEMENTARY = 0x10000; - + /** * Used to mask off single plane in checking for NON_CHARACTER */ private static final int PLANE_MASK = 0xFFFF; - + /** * Range of non-characters in each plane */ - private static final int - NON_CHARACTER_BASE = 0xFFFE, + private static final int + NON_CHARACTER_BASE = 0xFFFE, NON_CHARACTER_END = 0xFFFF; // useful statics and tables for fast lookup - + /** * Values for surrogate detection. X is a surrogate iff X & SURROGATE_MASK == SURROGATE_MASK. */ static final int SURROGATE_MASK = 0xD800; - + /** * Bottom 10 bits for use in surrogates. */ private static final int TRAIL_MASK = 0x3FF; - + /** * Shift value for surrogates. */ private static final int SURROGATE_SHIFT = 10; - - /** + + /** * Lead surrogates go from LEAD_BASE up to LEAD_LIMIT-1. */ private static final int LEAD_BASE = 0xD800, LEAD_LIMIT = 0xDC00; - - /** + + /** * Trail surrogates go from TRAIL_BASE up to TRAIL_LIMIT-1. */ private static final int TRAIL_BASE = 0xDC00, TRAIL_LIMIT = 0xE000; - - /** + + /** * Surrogates go from SURROGATE_BASE up to SURROGATE_LIMIT-1. */ private static final int SURROGATE_BASE = 0xD800, SURROGATE_LIMIT = 0xE000; - + /** * Any codepoint at or greater than SURROGATE_SPACE_BASE requires 2 16-bit code units. */ @@ -712,7 +725,7 @@ public final class UTF32 { */ private static final int SURROGATE_OFFSET = MIN_SUPPLEMENTARY - (LEAD_BASE << SURROGATE_SHIFT) - TRAIL_BASE; - + private static final int LEAD_BASE_OFFSET = LEAD_BASE - (MIN_SUPPLEMENTARY >> SURROGATE_SHIFT); - + }; diff --git a/tools/unicodetools/com/ibm/text/utility/UTF8StreamReader.java b/tools/unicodetools/com/ibm/text/utility/UTF8StreamReader.java index da165e5d1dc..1a5bee1330d 100644 --- a/tools/unicodetools/com/ibm/text/utility/UTF8StreamReader.java +++ b/tools/unicodetools/com/ibm/text/utility/UTF8StreamReader.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/UTF8StreamReader.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; import java.io.Reader; diff --git a/tools/unicodetools/com/ibm/text/utility/UTF8StreamWriter.java b/tools/unicodetools/com/ibm/text/utility/UTF8StreamWriter.java index 5b25d47ac26..41e7687adb4 100644 --- a/tools/unicodetools/com/ibm/text/utility/UTF8StreamWriter.java +++ b/tools/unicodetools/com/ibm/text/utility/UTF8StreamWriter.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/UTF8StreamWriter.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; import java.io.*; diff --git a/tools/unicodetools/com/ibm/text/utility/Utility.java b/tools/unicodetools/com/ibm/text/utility/Utility.java index fae022183bb..66356029e03 100644 --- a/tools/unicodetools/com/ibm/text/utility/Utility.java +++ b/tools/unicodetools/com/ibm/text/utility/Utility.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; import java.util.*; @@ -173,7 +186,7 @@ public final class Utility { // COMMON UTILITIES if (p.length() != 0) den = Integer.parseInt(p); return num/den; } - + public static int codePointFromHex(String p) { String temp = Utility.fromHex(p); if (UTF32.length32(temp) != 1) throw new ChainException("String is not single (UTF32) character: " + p, null); diff --git a/tools/unicodetools/com/ibm/text/utility/XMLParse.java b/tools/unicodetools/com/ibm/text/utility/XMLParse.java index a3b16f423ac..7be87ecb820 100644 --- a/tools/unicodetools/com/ibm/text/utility/XMLParse.java +++ b/tools/unicodetools/com/ibm/text/utility/XMLParse.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/XMLParse.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; /** diff --git a/tools/unicodetools/com/ibm/text/utility/XMLParseTypes.java b/tools/unicodetools/com/ibm/text/utility/XMLParseTypes.java index 9ed7a876db3..9db3b4fb9e8 100644 --- a/tools/unicodetools/com/ibm/text/utility/XMLParseTypes.java +++ b/tools/unicodetools/com/ibm/text/utility/XMLParseTypes.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/XMLParseTypes.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; /** Interface of values for use with XMLParse. diff --git a/tools/unicodetools/com/ibm/text/utility/testParser.java b/tools/unicodetools/com/ibm/text/utility/testParser.java index 8b196403a46..e9d332e35a0 100644 --- a/tools/unicodetools/com/ibm/text/utility/testParser.java +++ b/tools/unicodetools/com/ibm/text/utility/testParser.java @@ -1,3 +1,16 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/testParser.java,v $ +* $Date: 2001/08/31 00:19:16 $ +* $Revision: 1.2 $ +* +******************************************************************************* +*/ + package com.ibm.text.utility; /** Simple Test program for XMLParse