diff --git a/icu4j/src/com/ibm/icu/dev/test/TestAll.java b/icu4j/src/com/ibm/icu/dev/test/TestAll.java
index 375a597f547..ee50a014d17 100755
--- a/icu4j/src/com/ibm/icu/dev/test/TestAll.java
+++ b/icu4j/src/com/ibm/icu/dev/test/TestAll.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/TestAll.java,v $
- * $Date: 2002/04/03 04:31:59 $
- * $Revision: 1.25 $
+ * $Date: 2002/06/14 19:08:57 $
+ * $Revision: 1.26 $
*
*****************************************************************************************
*/
@@ -126,6 +126,12 @@ public class TestAll extends TestFmwk {
});
}
+ public void TestUScriptRun() throws Exception {
+ run( new TestFmwk[] {
+ new com.ibm.icu.dev.test.lang.TestUScriptRun(),
+ });
+ }
+
public void TestNumberFormat() throws Exception {
run(new TestFmwk[] {
new com.ibm.icu.dev.test.format.IntlTestNumberFormat(),
diff --git a/icu4j/src/com/ibm/icu/dev/test/lang/TestUScriptRun.java b/icu4j/src/com/ibm/icu/dev/test/lang/TestUScriptRun.java
new file mode 100644
index 00000000000..1f963a5f185
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/dev/test/lang/TestUScriptRun.java
@@ -0,0 +1,272 @@
+/**
+*******************************************************************************
+* Copyright (C) 1999-2002, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+
+package com.ibm.icu.dev.test.lang;
+
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.lang.UScriptRun;
+import com.ibm.icu.dev.test.TestFmwk;
+
+public class TestUScriptRun extends TestFmwk
+{
+ public TestUScriptRun()
+ {
+ // nothing
+ }
+
+ public static void main(String[] args) throws Exception {
+ new TestUScriptRun().run(args);
+ }
+
+ private static final class RunTestData
+ {
+ String runText;
+ int runScript;
+
+ public RunTestData(String theText, int theScriptCode)
+ {
+ runText = theText;
+ runScript = theScriptCode;
+ }
+ };
+
+ private static final RunTestData[] testData = {
+ new RunTestData("\u0020\u0946\u0939\u093F\u0928\u094D\u0926\u0940\u0020", UScript.DEVANAGARI),
+ new RunTestData("\u0627\u0644\u0639\u0631\u0628\u064A\u0629\u0020", UScript.ARABIC),
+ new RunTestData("\u0420\u0443\u0441\u0441\u043A\u0438\u0439\u0020", UScript.CYRILLIC),
+ new RunTestData("English (", UScript.LATIN),
+ new RunTestData("\u0E44\u0E17\u0E22", UScript.THAI),
+ new RunTestData(") ", UScript.LATIN),
+ new RunTestData("\u6F22\u5B75", UScript.HAN),
+ new RunTestData("\u3068\u3072\u3089\u304C\u306A\u3068", UScript.HIRAGANA),
+ new RunTestData("\u30AB\u30BF\u30AB\u30CA", UScript.KATAKANA),
+ new RunTestData("\uD801\uDC00\uD801\uDC01\uD801\uDC02\uD801\uDC03", UScript.DESERET)
+ };
+
+ private void CheckScriptRuns(UScriptRun scriptRun, int[] runStarts, RunTestData[] testData)
+ {
+ int run, runStart, runLimit;
+ int runScript;
+
+ /* iterate over all the runs */
+ run = 0;
+ while (scriptRun.next()) {
+ runStart = scriptRun.getScriptStart();
+ runLimit = scriptRun.getScriptLimit();
+ runScript = scriptRun.getScriptCode();
+
+ if (runStart != runStarts[run]) {
+ errln("Incorrect start offset for run " + run + ": expected " + runStarts[run] + ", got " + runStart);
+ }
+
+ if (runLimit != runStarts[run + 1]) {
+ errln("Incorrect limit offset for run " + run + ": expected " + runStarts[run + 1] + ", got " + runLimit);
+ }
+
+ if (runScript != testData[run].runScript) {
+ errln("Incorrect script for run " + run + ": expected \"" + UScript.getName(testData[run].runScript) + "\", got \"" + UScript.getName(runScript) + "\"");
+ }
+
+ run += 1;
+
+ /* stop when we've seen all the runs we expect to see */
+ if (run >= testData.length) {
+ break;
+ }
+ }
+
+ /* Complain if we didn't see then number of runs we expected */
+ if (run != testData.length) {
+ errln("Incorrect number of runs: expected " + testData.length + ", got " + run);
+ }
+ }
+
+ public void TestContstruction()
+ {
+ UScriptRun scriptRun = null;
+ char[] dummy = {'d', 'u', 'm', 'm', 'y'};
+
+ try {
+ scriptRun = new UScriptRun(null, 0, 100);
+ errln("new UScriptRun(null, 0, 100) did not produce an IllegalArgumentException!");
+ } catch (IllegalArgumentException iae) {
+ }
+
+ try {
+ scriptRun = new UScriptRun(null, 100, 0);
+ errln("new UScriptRun(null, 100, 0) did not produce an IllegalArgumentException!");
+ } catch (IllegalArgumentException iae) {
+ }
+
+ try {
+ scriptRun = new UScriptRun(null, 0, -100);
+ errln("new UScriptRun(null, 0, -100) did not produce an IllegalArgumentException!");
+ } catch (IllegalArgumentException iae) {
+ }
+
+ try {
+ scriptRun = new UScriptRun(null, -100, 0);
+ errln("new UScriptRun(null, -100, 0) did not produce an IllegalArgumentException!");
+ } catch (IllegalArgumentException iae) {
+ }
+
+ try {
+ scriptRun = new UScriptRun(dummy, 0, 6);
+ errln("new UScriptRun(dummy, 0, 6) did not produce an IllegalArgumentException!");
+ } catch (IllegalArgumentException iae) {
+ }
+
+ try {
+ scriptRun = new UScriptRun(dummy, 6, 0);
+ errln("new UScriptRun(dummy, 6, 0) did not produce an IllegalArgumentException!");
+ }catch (IllegalArgumentException iae) {
+ }
+
+ try {
+ scriptRun = new UScriptRun(dummy, 0, -100);
+ errln("new UScriptRun(dummy, 0, -100) did not produce an IllegalArgumentException!");
+ } catch (IllegalArgumentException iae) {
+ }
+
+ try {
+ scriptRun = new UScriptRun(dummy, -100, 0);
+ errln("new UScriptRun(dummy, -100, 0) did not produce an IllegalArgumentException!");
+ } catch (IllegalArgumentException iae) {
+ }
+
+ }
+
+ public void TestReset()
+ {
+ UScriptRun scriptRun = null;
+ char[] dummy = {'d', 'u', 'm', 'm', 'y'};
+
+ try {
+ scriptRun = new UScriptRun();
+ } catch (IllegalArgumentException iae) {
+ errln("new UScriptRun() produced an IllegalArgumentException!");
+ }
+
+ try {
+ scriptRun.reset(0, 100);
+ errln("scriptRun.reset(0, 100) did not produce an IllegalArgumentException!");
+ } catch (IllegalArgumentException iae) {
+ }
+
+ try {
+ scriptRun.reset(100, 0);
+ errln("scriptRun.reset(100, 0) did not produce an IllegalArgumentException!");
+ } catch (IllegalArgumentException iae) {
+ }
+
+ try {
+ scriptRun.reset(0, -100);
+ errln("scriptRun.reset(0, -100) did not produce an IllegalArgumentException!");
+ } catch (IllegalArgumentException iae) {
+ }
+
+ try {
+ scriptRun.reset(-100, 0);
+ errln("scriptRun.reset(-100, 0) did not produce an IllegalArgumentException!");
+ } catch (IllegalArgumentException iae) {
+ }
+
+ try {
+ scriptRun.reset(dummy, 0, 6);
+ errln("scriptRun.reset(dummy, 0, 6) did not produce an IllegalArgumentException!");
+ } catch (IllegalArgumentException iae) {
+ }
+
+ try {
+ scriptRun.reset(dummy, 6, 0);
+ errln("scriptRun.reset(dummy, 6, 0) did not produce an IllegalArgumentException!");
+ }catch (IllegalArgumentException iae) {
+ }
+
+ try {
+ scriptRun.reset(dummy, 0, -100);
+ errln("scriptRun.reset(dummy, 0, -100) did not produce an IllegalArgumentException!");
+ } catch (IllegalArgumentException iae) {
+ }
+
+ try {
+ scriptRun.reset(dummy, -100, 0);
+ errln("scriptRun.reset(dummy, -100, 0) did not produce an IllegalArgumentException!");
+ } catch (IllegalArgumentException iae) {
+ }
+
+ try {
+ scriptRun.reset(dummy, 0, dummy.length);
+ } catch (IllegalArgumentException iae) {
+ errln("scriptRun.reset(dummy, 0, dummy.length) produced an IllegalArgumentException!");
+ }
+
+
+ try {
+ scriptRun.reset(0, 6);
+ errln("scriptRun.reset(0, 6) did not produce an IllegalArgumentException!");
+ } catch (IllegalArgumentException iae) {
+ }
+
+ try {
+ scriptRun.reset(6, 0);
+ errln("scriptRun.reset(6, 0) did not produce an IllegalArgumentException!");
+ } catch (IllegalArgumentException iae) {
+ }
+ }
+
+ public void TestRuns()
+ {
+ int stringLimit = 0;
+ int[] runStarts = new int[testData.length + 1];
+ String testString = "";
+ UScriptRun scriptRun = null;
+
+ /*
+ * Fill in the test string and the runStarts array.
+ */
+ for (int run = 0; run < testData.length; run += 1) {
+ runStarts[run] = stringLimit;
+ stringLimit += testData[run].runText.length();
+ testString += testData[run].runText;
+ }
+
+ /* The limit of the last run */
+ runStarts[testData.length] = stringLimit;
+
+ try {
+ scriptRun = new UScriptRun(testString.toCharArray());
+ CheckScriptRuns(scriptRun, runStarts, testData);
+ } catch (IllegalArgumentException iae) {
+ errln("new UScriptRun(testString.toCharArray()) produced an IllegalArgumentException!");
+ }
+
+ try {
+ scriptRun.reset();
+ CheckScriptRuns(scriptRun, runStarts, testData);
+ } catch (IllegalArgumentException iae) {
+ errln("scriptRun.reset() on a valid UScriptRun produced an IllegalArgumentException!");
+ }
+
+ try {
+ scriptRun = new UScriptRun();
+
+ if (scriptRun.next()) {
+ errln("scriptRun.next() on an empty UScriptRun returned true!");
+ }
+ } catch (IllegalArgumentException iae) {
+ errln("new UScriptRun() produced an IllegalArgumentException!");
+ }
+
+ try {
+ scriptRun.reset(testString.toCharArray(), 0, testString.length());
+ CheckScriptRuns(scriptRun, runStarts, testData);
+ } catch (IllegalArgumentException iae) {
+ errln("scriptRun.reset(testString.toCharArray(), 0, testString.length) produced an IllegalArgumentException!");
+ }
+ }
+}
diff --git a/icu4j/src/com/ibm/icu/lang/UScriptRun.java b/icu4j/src/com/ibm/icu/lang/UScriptRun.java
new file mode 100644
index 00000000000..d5b2d16d1b4
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/lang/UScriptRun.java
@@ -0,0 +1,403 @@
+/*
+ *******************************************************************************
+ *
+ * Copyright (C) 1999-2002, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.lang;
+
+/**
+ * UScriptRun
is used to find runs of characters in
+ * the same script. It implements a simple iterator over an array
+ * of characters. The iterator will resolve script-neutral characters
+ * like punctuation into the script of the surrounding characters.
+ *
+ * The iterator will try to match paired punctuation. If it sees an
+ * opening punctuation character, it will remember the script that
+ * was assigned to that character, and assign the same script to the
+ * matching closing punctuation.
+ *
+ * Scripts are chosen based on the UScript
class.
+ * No attempt is made to combine related scripts into a single run. In
+ * particular, Hiragana, Katakana, and Han characters will appear in seperate
+ * runs.
+
+ * Here is an example of how to iterate over script runs:
+ *
+ * void printScriptRuns(char []text) + * { + * UScriptRun scriptRun = new UScriptRun(text); + * + * while (scriptRun.next())) { + * int start = scriptRun.getScriptStart(); + * int limit = scriptRun.getScriptLimit(); + * int script = scriptRun.getScriptCode(); + * + * System.out.println("Script \"" + UScript.getName(script) + "\" from " + + * start + " to " + limit + "."); + * } + * } + *+ */ +public final class UScriptRun +{ + /** + * Puts a copyright in the .class file + */ + private static final String copyrightNotice + = "Copyright \u00a91999-2002 IBM Corp. All rights reserved."; + + /** + * Construct an empty
UScriptRun
object. The next()
+ * method will return false
the first time it is called.
+ */
+ public UScriptRun()
+ {
+ reset(null, 0, 0);
+ }
+
+ /**
+ * Construct a UScriptRun
object which iterates over the given
+ * characetrs.
+ *
+ * @param chars the array of characters over which to iterate.
+ */
+ public UScriptRun(char[] chars)
+ {
+ reset(chars, 0, chars.length);
+ }
+
+ /**
+ * Construct a UScriptRun
object which iterates over a subrange
+ * of the given characetrs.
+ *
+ * @param chars the array of characters over which to iterate.
+ * @param start the index of the first character over which to iterate
+ * @param count the number of characters over which to iterate
+ */
+ public UScriptRun(char[] chars, int start, int count)
+ {
+ reset(chars, start, count);
+ }
+
+
+ /**
+ * Reset the iterator to the start of the text.
+ */
+ public final void reset() {
+ scriptStart = charStart;
+ scriptLimit = charStart;
+ scriptCode = UScript.COMMON;
+ }
+
+ /**
+ * Reset the iterator to iterate over the given range of the text. Throws
+ * IllegalArgumentException if the range is outside of the bounds of the
+ * character array.
+ *
+ * @param start the index of the new first character over which to iterate
+ * @param count the new number of characters over which to iterate.
+ * @exception IllegalArgumentException
+ */
+ public final void reset(int start, int count)
+ throws IllegalArgumentException
+ {
+ int len = 0;
+
+ if (charArray != null) {
+ len = charArray.length;
+ }
+
+ if (start < 0 || count < 0 || start > len - count) {
+ throw new IllegalArgumentException();
+ }
+
+ charStart = start;
+ charLimit = start + count;
+
+ reset();
+ }
+
+ /**
+ * Reset the iterator to iterate over count
characters
+ * in chars
starting at start
. This allows
+ * clients to reuse an iterator.
+ *
+ * @param chars the new array of characters over which to iterate.
+ * @param start the index of the first character over which to iterate.
+ * @param count the nuber of characters over which to iterate.
+ */
+ public final void reset(char[] chars, int start, int count)
+ {
+ charArray = chars;
+
+ reset(start, count);
+ }
+
+
+ /**
+ * Get the starting index of the current script run.
+ *
+ * @returns the index of the first character in the current script run.
+ */
+ public final int getScriptStart()
+ {
+ return scriptStart;
+ }
+
+ /**
+ * Get the index of the first character after the current script run.
+ *
+ * @return the index of the first character after the current script run.
+ */
+ public final int getScriptLimit()
+ {
+ return scriptLimit;
+ }
+
+ /**
+ * Get the script code for the script of the current script run.
+ *
+ * @return the script code for the script of the current script run.
+ * @see com.ibm.icu.lang.UScript
+ */
+ public final int getScriptCode()
+ {
+ return scriptCode;
+ }
+
+ /**
+ * Find the next script run. Returns false
if there
+ * isn't another run, returns true
if there is.
+ *
+ * @return false
if there isn't another run, true
if there is.
+ */
+ public final boolean next()
+ {
+ int startSP = parenSP; // used to find the first new open character
+
+ // if we've fallen off the end of the text, we're done
+ if (scriptLimit >= charLimit) {
+ return false;
+ }
+
+ scriptCode = UScript.COMMON;
+
+ for (scriptStart = scriptLimit; scriptLimit < charLimit; scriptLimit += 1) {
+ int high = charArray[scriptLimit];
+ int ch = high;
+
+ // if the character is a high surrogate and it's not the last one
+ // in the text, see if it's followed by a low surrogate
+ if (high >= 0xD800 && high <= 0xDBFF && scriptLimit < charLimit - 1)
+ {
+ int low = charArray[scriptLimit + 1];
+
+ // if it is followed by a low surrogate,
+ // consume it and form the full character
+ if (low >= 0xDC00 && low <= 0xDFFF) {
+ ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000;
+ scriptLimit += 1;
+ }
+ }
+
+ int sc = UScript.getScript(ch);
+ int pairIndex = getPairIndex(ch);
+
+ // Paired character handling:
+ //
+ // if it's an open character, push it onto the stack.
+ // if it's a close character, find the matching open on the
+ // stack, and use that script code. Any non-matching open
+ // characters above it on the stack will be poped.
+ if (pairIndex >= 0) {
+ if ((pairIndex & 1) == 0) {
+ parenStack[++parenSP] = new ParenStackEntry(pairIndex, scriptCode);
+ } else if (parenSP >= 0) {
+ int pi = pairIndex & ~1;
+
+ while (parenSP >= 0 && parenStack[parenSP].pairIndex != pi) {
+ parenSP -= 1;
+ }
+
+ if (parenSP < startSP) {
+ startSP = parenSP;
+ }
+
+ if (parenSP >= 0) {
+ sc = parenStack[parenSP].scriptCode;
+ }
+ }
+ }
+
+ if (sameScript(scriptCode, sc)) {
+ if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
+ scriptCode = sc;
+
+ // now that we have a final script code, fix any open
+ // characters we pushed before we knew the script code.
+ while (startSP < parenSP) {
+ parenStack[++startSP].scriptCode = scriptCode;
+ }
+ }
+
+ // if this character is a close paired character,
+ // pop it from the stack
+ if (pairIndex >= 0 && (pairIndex & 1) != 0 && parenSP >= 0) {
+ parenSP -= 1;
+ startSP -= 1;
+ }
+ } else {
+ // if the run broke on a surrogate pair,
+ // end it before the high surrogate
+ if (ch >= 0x10000) {
+ scriptLimit -= 1;
+ }
+
+ break;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * Compare two script codes to see if they are in the same script. If one script is
+ * a strong script, and the other is INHERITED or COMMON, it will compare equal.
+ *
+ * @param scriptOne one of the script codes.
+ * @param scriptTwo the other script code.
+ * @return true
if the two scripts are the same.
+ * @see com.ibm.icu.lang.UScript
+ */
+ private static boolean sameScript(int scriptOne, int scriptTwo)
+ {
+ return scriptOne <= UScript.INHERITED || scriptTwo <= UScript.INHERITED || scriptOne == scriptTwo;
+ }
+
+ /*
+ * An internal class which holds entries on the paren stack.
+ */
+ private static final class ParenStackEntry
+ {
+ int pairIndex;
+ int scriptCode;
+
+ public ParenStackEntry(int thePairIndex, int theScriptCode)
+ {
+ pairIndex = thePairIndex;
+ scriptCode = theScriptCode;
+ }
+ };
+
+ private int charStart;
+ private int charLimit;
+ private char charArray[];
+
+ private int scriptStart;
+ private int scriptLimit;
+ private int scriptCode;
+
+ private static ParenStackEntry parenStack[] = new ParenStackEntry[128];
+ private int parenSP;
+
+ /**
+ * Find the highest bit that's set in a word. Uses a binary search through
+ * the bits.
+ *
+ * @param n the word in which to find the highest bit that's set.
+ * @return the bit number (counting from the low order bit) of the highest bit.
+ */
+ private static final byte highBit(int n)
+ {
+ if (n <= 0) {
+ return -32;
+ }
+
+ byte bit = 0;
+
+ if (n >= 1 << 16) {
+ n >>= 16;
+ bit += 16;
+ }
+
+ if (n >= 1 << 8) {
+ n >>= 8;
+ bit += 8;
+ }
+
+ if (n >= 1 << 4) {
+ n >>= 4;
+ bit += 4;
+ }
+
+ if (n >= 1 << 2) {
+ n >>= 2;
+ bit += 2;
+ }
+
+ if (n >= 1 << 1) {
+ n >>= 1;
+ bit += 1;
+ }
+
+ return bit;
+ }
+
+ /**
+ * Search the pairedChars array for the given character.
+ *
+ * @param ch the character for which to search.
+ * @return the index of the character in the table, or -1 if it's not there.
+ */
+ private static int getPairIndex(int ch)
+ {
+ int probe = pairedCharPower;
+ int index = 0;
+
+ if (ch >= pairedChars[pairedCharExtra]) {
+ index = pairedCharExtra;
+ }
+
+ while (probe > (1 << 0)) {
+ probe >>= 1;
+
+ if (ch >= pairedChars[index + probe]) {
+ index += probe;
+ }
+ }
+
+ if (pairedChars[index] != ch) {
+ index = -1;
+ }
+
+ return index;
+ }
+
+ private static int pairedChars[] = {
+ 0x0028, 0x0029, // ascii paired punctuation
+ 0x003c, 0x003e,
+ 0x005b, 0x005d,
+ 0x007b, 0x007d,
+ 0x00ab, 0x00bb, // guillemets
+ 0x2018, 0x2019, // general punctuation
+ 0x201c, 0x201d,
+ 0x2039, 0x203a,
+ 0x3008, 0x3009, // chinese paired punctuation
+ 0x300a, 0x300b,
+ 0x300c, 0x300d,
+ 0x300e, 0x300f,
+ 0x3010, 0x3011,
+ 0x3014, 0x3015,
+ 0x3016, 0x3017,
+ 0x3018, 0x3019,
+ 0x301a, 0x301b
+ };
+
+ private static int pairedCharPower = 1 << highBit(pairedChars.length);
+ private static int pairedCharExtra = pairedChars.length - pairedCharPower;
+}
+