ICU-3652 make icu4j run under security manager

X-SVN-Rev: 14467
This commit is contained in:
Doug Felt 2004-02-06 21:54:06 +00:00
parent aa012dfd7c
commit 873f4f09fd
32 changed files with 940 additions and 675 deletions

View file

@ -6,8 +6,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/build.xml,v $
* $Date: 2004/01/30 19:17:51 $
* $Revision: 1.87 $
* $Date: 2004/02/06 21:54:06 $
* $Revision: 1.88 $
*
*******************************************************************************
* This is the ant build file for ICU4J. See readme.html for more information.
@ -86,6 +86,7 @@
<property name="jarDocs.file" value="icu4jdocs.jar"/>
<property name="icu4j.manifest" value="${src.dir}/com/ibm/icu/manifest.stub"/>
<property name="icu4j.module.manifest" value="${src.dir}/com/ibm/icu/manifest.module.stub"/>
<property name="icu4j.tests.manifest" value="${src.dir}/com/ibm/icu/dev/test/manifest.test.stub"/>
<property name="zip.file" value="../icu4j${DSTAMP}.zip"/>
<property name="zipSrc.file" value="../icu4jSrc${DSTAMP}.zip"/>
@ -239,11 +240,12 @@
manifest="${icu4j.manifest}"/>
</target>
<target name="testJar" depends="tests">
<jar jarfile="${testjar.file}"
compress="true"
includes="com/ibm/icu/dev/test/**/*"
basedir="${build.dir}"/>
<target name="testJar" depends="tests,jar">
<jar jarfile="${testjar.file}" compress="true" manifest="${icu4j.tests.manifest}">
<fileset dir="${build.dir}" includes="com/ibm/icu/dev/test/**/*"/>
<fileset dir="${build.dir}" includes="com/ibm/icu/dev/data/TestDataElements*.class"/>
<fileset dir="${src.dir}" includes="com/ibm/icu/dev/data/**/*/" excludes="**/*.java"/> <!-- too broad, but easy -->
</jar>
</target>
<target name="jarSrc" depends="init">
@ -310,24 +312,24 @@
<target name="check" depends="tests">
<java classname="com.ibm.icu.dev.test.TestAll" fork="yes">
<arg value="-w"/>
<classpath>
<pathelement path="${java.class.path}/"/>
<pathelement location="clover.jar"/>
<pathelement path="${build.dir}"/>
</classpath>
</classpath>
</java>
</target>
<target name="secureCheck" depends="tests">
<target name="secureCheck" depends="testJar">
<java classname="com.ibm.icu.dev.test.TestAll" fork="yes">
<!-- (use for debugging, LOTS of output) jvmarg value="-Djava.security.debug=access" -->
<jvmarg value="-Djava.security.manager"/>
<jvmarg value="-Djava.security.policy=src/com/ibm/icu/dev/test/security.policy"/>
<arg value="-w"/>
<jvmarg value="-Djava.security.policy=${src.dir}/com/ibm/icu/dev/test/security.policy"/>
<arg value="-w"/>
<classpath>
<pathelement path="${java.class.path}/"/>
<pathelement location="clover.jar"/>
<pathelement path="${build.dir}"/>
<pathelement location="${testjar.file}"/>
</classpath>
</java>
</target>

View file

@ -5,15 +5,16 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/demo/rbbi/BreakIteratorRules_en_US_DEMO.java,v $
* $Date: 2002/02/19 04:10:23 $
* $Revision: 1.8 $
* $Date: 2004/02/06 21:54:04 $
* $Revision: 1.9 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.demo.rbbi;
import java.util.ListResourceBundle;
import java.net.URL;
import com.ibm.icu.impl.ICUData;
/**
* This resource bundle is included for testing and demonstration purposes only.
@ -22,202 +23,209 @@ import java.net.URL;
* with good resource data (and a good dictionary file) for Thai
*/
public class BreakIteratorRules_en_US_DEMO extends ListResourceBundle {
private static final URL url =
BreakIteratorRules_en_US_DEMO.class.getResource("/com/ibm/data/misc/english.dict");
public Object[][] getContents() {
return contents;
}
private static final String DATA_NAME = "/com/ibm/data/misc/english.dict";
static final Object[][] contents = {
// names of classes to instantiate for the different kinds of break
// iterator. Notice we're now using DictionaryBasedBreakIterator
// for word and line breaking.
{ "BreakIteratorClasses",
new String[] { "RuleBasedBreakIterator", // character-break iterator class
"DictionaryBasedBreakIterator", // word-break iterator class
"DictionaryBasedBreakIterator", // line-break iterator class
"RuleBasedBreakIterator" } // sentence-break iterator class
},
// These are the same word-breaking rules as are specified in the default
// resource, except that the Latin letters, apostrophe, and hyphen are
// specified as dictionary characters
{ "WordBreakRules",
// ignore non-spacing marks, enclosing marks, and format characters,
// all of which should not influence the algorithm
"$_ignore_=[[:Mn:][:Me:][:Cf:]];"
public Object[][] getContents() {
final boolean exists = ICUData.exists(DATA_NAME);
// lower and upper case Roman letters, apostrophy and dash are
// in the English dictionary
+ "$_dictionary_=[a-zA-Z\\'\\-];"
if (!exists) {
return new Object[0][0];
}
// Hindi phrase separator, kanji, katakana, hiragana, CJK diacriticals,
// other letters, and digits
+ "$danda=[\u0964\u0965];"
+ "$kanji=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];"
+ "$kata=[\u3099-\u309c\u30a1-\u30fe];"
+ "$hira=[\u3041-\u309e\u30fc];"
+ "$let=[[[:L:][:Mc:]]-[$kanji$kata$hira]];"
+ "$dgt=[:N:];"
return new Object[][] {
// names of classes to instantiate for the different kinds of break
// iterator. Notice we're now using DictionaryBasedBreakIterator
// for word and line breaking.
{ "BreakIteratorClasses",
new String[] {
"RuleBasedBreakIterator",
// character-break iterator class
"DictionaryBasedBreakIterator",
// word-break iterator class
"DictionaryBasedBreakIterator",
// line-break iterator class
"RuleBasedBreakIterator" } // sentence-break iterator class
},
// punctuation that can occur in the middle of a word: currently
// dashes, apostrophes, and quotation marks
+ "$mid_word=[[:Pd:]\u00ad\u2027\\\"\\\'];"
// These are the same word-breaking rules as are specified in the default
// resource, except that the Latin letters, apostrophe, and hyphen are
// specified as dictionary characters
{
"WordBreakRules",
// ignore non-spacing marks, enclosing marks, and format characters,
// all of which should not influence the algorithm
"$_ignore_=[[:Mn:][:Me:][:Cf:]];"
// punctuation that can occur in the middle of a number: currently
// apostrophes, qoutation marks, periods, commas, and the Arabic
// decimal point
+ "$mid_num=[\\\"\\\'\\,\u066b\\.];"
// lower and upper case Roman letters, apostrophy and dash are
// in the English dictionary
+"$_dictionary_=[a-zA-Z\\'\\-];"
// punctuation that can occur at the beginning of a number: currently
// the period, the number sign, and all currency symbols except the cents sign
+ "$pre_num=[[[:Sc:]-[\u00a2]]\\#\\.];"
// Hindi phrase separator, kanji, katakana, hiragana, CJK diacriticals,
// other letters, and digits
+"$danda=[\u0964\u0965];"
+ "$kanji=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];"
+ "$kata=[\u3099-\u309c\u30a1-\u30fe];"
+ "$hira=[\u3041-\u309e\u30fc];"
+ "$let=[[[:L:][:Mc:]]-[$kanji$kata$hira]];"
+ "$dgt=[:N:];"
// punctuation that can occur at the end of a number: currently
// the percent, per-thousand, per-ten-thousand, and Arabic percent
// signs, the cents sign, and the ampersand
+ "$post_num=[\\%\\&\u00a2\u066a\u2030\u2031];"
// punctuation that can occur in the middle of a word: currently
// dashes, apostrophes, and quotation marks
+"$mid_word=[[:Pd:]\u00ad\u2027\\\"\\\'];"
// line separators: currently LF, FF, PS, and LS
+ "$ls=[\n\u000c\u2028\u2029];"
// punctuation that can occur in the middle of a number: currently
// apostrophes, qoutation marks, periods, commas, and the Arabic
// decimal point
+"$mid_num=[\\\"\\\'\\,\u066b\\.];"
// whitespace: all space separators and the tab character
+ "$ws=[[:Zs:]\t];"
// punctuation that can occur at the beginning of a number: currently
// the period, the number sign, and all currency symbols except the cents sign
+"$pre_num=[[[:Sc:]-[\u00a2]]\\#\\.];"
// a word is a sequence of letters that may contain internal
// punctuation, as long as it begins and ends with a letter and
// never contains two punctuation marks in a row
+ "$word=($let+($mid_word$let+)*$danda?);"
// punctuation that can occur at the end of a number: currently
// the percent, per-thousand, per-ten-thousand, and Arabic percent
// signs, the cents sign, and the ampersand
+"$post_num=[\\%\\&\u00a2\u066a\u2030\u2031];"
// a number is a sequence of digits that may contain internal
// punctuation, as long as it begins and ends with a digit and
// never contains two punctuation marks in a row.
+ "$number=($dgt+($mid_num$dgt+)*);"
// line separators: currently LF, FF, PS, and LS
+"$ls=[\n\u000c\u2028\u2029];"
// break after every character, with the following exceptions
// (this will cause punctuation marks that aren't considered
// part of words or numbers to be treated as words unto themselves)
+ ".;"
// whitespace: all space separators and the tab character
+"$ws=[[:Zs:]\t];"
// keep together any sequence of contiguous words and numbers
// (including just one of either), plus an optional trailing
// number-suffix character
+ "$word?($number$word)*($number$post_num?)?;"
// a word is a sequence of letters that may contain internal
// punctuation, as long as it begins and ends with a letter and
// never contains two punctuation marks in a row
+"$word=($let+($mid_word$let+)*$danda?);"
// keep together and sequence of contiguous words and numbers
// that starts with a number-prefix character and a number,
// and may end with a number-suffix character
+ "$pre_num($number$word)*($number$post_num?)?;"
// a number is a sequence of digits that may contain internal
// punctuation, as long as it begins and ends with a digit and
// never contains two punctuation marks in a row.
+"$number=($dgt+($mid_num$dgt+)*);"
// keep together runs of whitespace (optionally with a single trailing
// line separator or CRLF sequence)
+ "$ws*\r?$ls?;"
// break after every character, with the following exceptions
// (this will cause punctuation marks that aren't considered
// part of words or numbers to be treated as words unto themselves)
+".;"
// keep together runs of Katakana
+ "$kata*;"
// keep together any sequence of contiguous words and numbers
// (including just one of either), plus an optional trailing
// number-suffix character
+"$word?($number$word)*($number$post_num?)?;"
// keep together runs of Hiragana
+ "$hira*;"
// keep together and sequence of contiguous words and numbers
// that starts with a number-prefix character and a number,
// and may end with a number-suffix character
+"$pre_num($number$word)*($number$post_num?)?;"
// keep together runs of Kanji
+ "$kanji*;"},
// These are the same line-breaking rules as are specified in the default
// resource, except that the Latin letters, apostrophe, and hyphen are
// specified as dictionary characters
{ "LineBreakRules",
// ignore non-spacing marks, enclosing marks, and format characters
"$_ignore_=[[:Mn:][:Me:][:Cf:]];"
// keep together runs of whitespace (optionally with a single trailing
// line separator or CRLF sequence)
+"$ws*\r?$ls?;"
// lower and upper case Roman letters, apostrophy and dash
// are in the English dictionary
+ "$_dictionary_=[a-zA-Z\\'\\-];"
// keep together runs of Katakana
+"$kata*;"
// Hindi phrase separators
+ "$danda=[\u0964\u0965];"
// keep together runs of Hiragana
+"$hira*;"
// characters that always cause a break: ETX, tab, LF, FF, LS, and PS
+ "$break=[\u0003\t\n\f\u2028\u2029];"
// keep together runs of Kanji
+"$kanji*;" },
// characters that always prevent a break: the non-breaking space
// and similar characters
+ "$nbsp=[\u00a0\u2007\u2011\ufeff];"
// These are the same line-breaking rules as are specified in the default
// resource, except that the Latin letters, apostrophe, and hyphen are
// specified as dictionary characters
{ "LineBreakRules",
// ignore non-spacing marks, enclosing marks, and format characters
"$_ignore_=[[:Mn:][:Me:][:Cf:]];"
// whitespace: space separators and control characters, except for
// CR and the other characters mentioned above
+ "$space=[[[:Zs:][:Cc:]]-[$nbsp$break\r]];"
// lower and upper case Roman letters, apostrophy and dash
// are in the English dictionary
+"$_dictionary_=[a-zA-Z\\'\\-];"
// dashes: dash punctuation and the discretionary hyphen, except for
// non-breaking hyphens
+ "$dash=[[[:Pd:]\u00ad]-[$nbsp]];"
// Hindi phrase separators
+"$danda=[\u0964\u0965];"
// characters that stick to a word if they precede it: currency symbols
// (except the cents sign) and starting punctuation
+ "$pre_word=[[[:Sc:]-[\u00a2]][:Ps:]\\\"\\\'];"
// characters that always cause a break: ETX, tab, LF, FF, LS, and PS
+"$break=[\u0003\t\n\f\u2028\u2029];"
// characters that stick to a word if they follow it: ending punctuation,
// other punctuation that usually occurs at the end of a sentence,
// small Kana characters, some CJK diacritics, etc.
+ "$post_word=[[:Pe:]\\!\\\"\\\'\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034"
+ "\u2103\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063"
+ "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9"
+ "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0c"
+ "\uff0e\uff1f];"
// characters that always prevent a break: the non-breaking space
// and similar characters
+"$nbsp=[\u00a0\u2007\u2011\ufeff];"
// Kanji: actually includes both Kanji and Kana, except for small Kana and
// CJK diacritics
+ "$kanji=[[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa]-[$post_word$_ignore_]];"
// whitespace: space separators and control characters, except for
// CR and the other characters mentioned above
+"$space=[[[:Zs:][:Cc:]]-[$nbsp$break\r]];"
// digits
+ "$digit=[[:Nd:][:No:]];"
// dashes: dash punctuation and the discretionary hyphen, except for
// non-breaking hyphens
+"$dash=[[[:Pd:]\u00ad]-[$nbsp]];"
// punctuation that can occur in the middle of a number: periods and commas
+ "$mid_num=[\\.\\,];"
// characters that stick to a word if they precede it: currency symbols
// (except the cents sign) and starting punctuation
+"$pre_word=[[[:Sc:]-[\u00a2]][:Ps:]\\\"\\\'];"
// everything not mentioned above, plus the quote marks (which are both
// <pre-word>, <post-word>, and <char>)
+ "$char=[^$break$space$dash$kanji$nbsp$_ignore_$pre_word$post_word$mid_num$danda\r\\\"\\\'];"
// characters that stick to a word if they follow it: ending punctuation,
// other punctuation that usually occurs at the end of a sentence,
// small Kana characters, some CJK diacritics, etc.
+"$post_word=[[:Pe:]\\!\\\"\\\'\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034"
+ "\u2103\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063"
+ "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9"
+ "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0c"
+ "\uff0e\uff1f];"
// a "number" is a run of prefix characters and dashes, followed by one or
// more digits with isolated number-punctuation characters interspersed
+ "$number=([$pre_word$dash]*$digit+($mid_num$digit+)*);"
// Kanji: actually includes both Kanji and Kana, except for small Kana and
// CJK diacritics
+"$kanji=[[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa]-[$post_word$_ignore_]];"
// the basic core of a word can be either a "number" as defined above, a single
// "Kanji" character, or a run of any number of not-explicitly-mentioned
// characters (this includes Latin letters)
+ "$word_core=([$pre_word$char]*|$kanji|$number);"
// digits
+"$digit=[[:Nd:][:No:]];"
// a word may end with an optional suffix that be either a run of one or
// more dashes or a run of word-suffix characters, followed by an optional
// run of whitespace
+ "$word_suffix=(($dash+|$post_word*)$space*);"
// punctuation that can occur in the middle of a number: periods and commas
+"$mid_num=[\\.\\,];"
// a word, thus, is an optional run of word-prefix characters, followed by
// a word core and a word suffix (the syntax of <word-core> and <word-suffix>
// actually allows either of them to match the empty string, putting a break
// between things like ")(" or "aaa(aaa"
+ "$word=($pre_word*$word_core$word_suffix);"
// everything not mentioned above, plus the quote marks (which are both
// <pre-word>, <post-word>, and <char>)
+"$char=[^$break$space$dash$kanji$nbsp$_ignore_$pre_word$post_word$mid_num$danda\r\\\"\\\'];"
// finally, the rule that does the work: Keep together any run of words that
// are joined by runs of one of more non-spacing mark. Also keep a trailing
// line-break character or CRLF combination with the word. (line separators
// "win" over nbsp's)
+ "$word($nbsp+$word)*\r?$break?;" },
// these two resources specify the pathnames of the dictionary files to
// use for word breaking and line breaking. Both currently refer to
// a file called english.dict placed in com.ibm.icu.impl.data
// somewhere in the class path. It's important to note that
// english.dict was created for testing purposes only, and doesn't
// come anywhere close to being an exhaustive dictionary of English
// words (basically, it contains all the words in the Declaration of
// Independence, and the Revised Standard Version of the book of Genesis,
// plus a few other words thrown in to show more interesting cases).
// { "WordBreakDictionary", "com\\ibm\\text\\resources\\english.dict" },
// { "LineBreakDictionary", "com\\ibm\\text\\resources\\english.dict" }
{ "WordBreakDictionary", url },
{ "LineBreakDictionary", url }
};
// a "number" is a run of prefix characters and dashes, followed by one or
// more digits with isolated number-punctuation characters interspersed
+"$number=([$pre_word$dash]*$digit+($mid_num$digit+)*);"
// the basic core of a word can be either a "number" as defined above, a single
// "Kanji" character, or a run of any number of not-explicitly-mentioned
// characters (this includes Latin letters)
+"$word_core=([$pre_word$char]*|$kanji|$number);"
// a word may end with an optional suffix that be either a run of one or
// more dashes or a run of word-suffix characters, followed by an optional
// run of whitespace
+"$word_suffix=(($dash+|$post_word*)$space*);"
// a word, thus, is an optional run of word-prefix characters, followed by
// a word core and a word suffix (the syntax of <word-core> and <word-suffix>
// actually allows either of them to match the empty string, putting a break
// between things like ")(" or "aaa(aaa"
+"$word=($pre_word*$word_core$word_suffix);"
// finally, the rule that does the work: Keep together any run of words that
// are joined by runs of one of more non-spacing mark. Also keep a trailing
// line-break character or CRLF combination with the word. (line separators
// "win" over nbsp's)
+"$word($nbsp+$word)*\r?$break?;" },
// these two resources specify the pathnames of the dictionary files to
// use for word breaking and line breaking. Both currently refer to
// a file called english.dict placed in com.ibm.icu.impl.data
// somewhere in the class path. It's important to note that
// english.dict was created for testing purposes only, and doesn't
// come anywhere close to being an exhaustive dictionary of English
// words (basically, it contains all the words in the Declaration of
// Independence, and the Revised Standard Version of the book of Genesis,
// plus a few other words thrown in to show more interesting cases).
// { "WordBreakDictionary", "com\\ibm\\text\\resources\\english.dict" },
// { "LineBreakDictionary", "com\\ibm\\text\\resources\\english.dict" }
{ "WordBreakDictionary", DATA_NAME },
{ "LineBreakDictionary", DATA_NAME }
};
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/TestFmwk.java,v $
* $Date: 2004/01/27 23:13:13 $
* $Revision: 1.59 $
* $Date: 2004/02/06 21:54:04 $
* $Revision: 1.60 $
*
*****************************************************************************************
*/
@ -224,7 +224,6 @@ public class TestFmwk extends AbstractTestLog {
if (name.indexOf('.') == -1) {
name = defaultPackage + name;
}
try {
Class cls = Class.forName(name);
if (!TestFmwk.class.isAssignableFrom(cls)) {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/TestUtil.java,v $
* $Date: 2003/11/24 22:13:51 $
* $Revision: 1.10 $
* $Date: 2004/02/06 21:54:05 $
* $Revision: 1.11 $
*
*******************************************************************************
*/
@ -18,172 +18,221 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
public class TestUtil {
/**
* Standard path to the test data.
*/
public static final String DATA_PATH = "/src/com/ibm/icu/dev/data/";
public final class TestUtil {
/**
* Path to test data in icu4jtest.jar
*/
public static final String LOCAL_DATA_PATH = "/com/ibm/icu/dev/data/";
/**
* Property for user-defined data path.
*/
public static final String DATA_PATH_PROPERTY = "ICUDataPath";
/**
* Standard path to the test data in the file system.
*/
public static final String DATA_PATH = "/src" + LOCAL_DATA_PATH;
/**
* Property for user-defined data path.
*/
public static final String DATA_PATH_PROPERTY = "ICUDataPath";
/**
* Property for modular build.
*/
public static final String DATA_MODULAR_BUILD_PROPERTY = "ICUModularBuild";
/**
* Property for modular build.
*/
public static final String DATA_MODULAR_BUILD_PROPERTY = "ICUModularBuild";
/**
* Compute a full data path using the ICUDataPath, if defined, or the user.dir, if we
* are allowed access to it.
*/
private static final String dataPath(String fileName) {
String s = System.getProperty(DATA_PATH_PROPERTY);
if (s == null) {
// assume user.dir is directly above src directory
// data path must end in '/' or '\', fileName should not start with one
s = System.getProperty("user.dir"); // protected property
s = s + DATA_PATH;
}
return s + fileName;
}
/**
* Compute a full data path using the ICUDataPath, if defined, or the user.dir, if we
* are allowed access to it.
*/
private static final String dataPath(String fileName) {
String s = System.getProperty(DATA_PATH_PROPERTY);
if (s == null) {
// assume user.dir is directly above src directory
// data path must end in '/' or '\', fileName should not start with one
s = System.getProperty("user.dir"); // protected property
s = s + DATA_PATH;
}
return s + fileName;
}
/**
* Return a buffered reader on the data file at path 'name' rooted at the data path.
*/
public static final BufferedReader getDataReader(String name) throws IOException {
InputStream is = new FileInputStream(dataPath(name));
InputStreamReader isr = new InputStreamReader(is);
return new BufferedReader(isr);
}
/**
* Return an input stream on the data file at path 'name' rooted at the data path
*/
public static final InputStream getDataStream(String name) throws IOException {
InputStream is = null;
try {
is = new FileInputStream(dataPath(name));
} catch (Throwable e) {
try {
is = TestUtil.class.getResourceAsStream(LOCAL_DATA_PATH + name);
} catch (Throwable t) {
IOException ex =
new IOException("data resource '" + name + "' not found");
ex.initCause(t);
throw ex;
}
}
return is;
}
/**
* Return a buffered reader on the data file at path 'name' rooted at the data path,
* using the provided encoding.
*/
public static final BufferedReader getDataReader(String name, String charset) throws IOException {
InputStream is = new FileInputStream(dataPath(name));
InputStreamReader isr = new InputStreamReader(is, charset);
return new BufferedReader(isr);
}
/**
* Return a buffered reader on the data file at path 'name' rooted at the data path.
*/
public static final BufferedReader getDataReader(String name, String charset) throws IOException {
InputStream is = getDataStream(name);;
InputStreamReader isr =
charset == null
? new InputStreamReader(is)
: new InputStreamReader(is, charset);
return new BufferedReader(isr);
}
/**
* Return an input stream on the data file at path 'name' rooted at the data path
*/
public static final InputStream getDataStream(String name) throws IOException{
return new FileInputStream(dataPath(name));
}
/**
* Return a buffered reader on the data file at path 'name' rooted at the data path,
* using the provided encoding.
*/
public static final BufferedReader getDataReader(String name)
throws IOException {
return getDataReader(name, null);
}
static final char DIGITS[] = {
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
'U', 'V', 'W', 'X', 'Y', 'Z'
};
/**
* Return true if the character is NOT printable ASCII. The tab,
* newline and linefeed characters are considered unprintable.
*/
public static boolean isUnprintable(int c) {
return !(c >= 0x20 && c <= 0x7E);
}
/**
* Escape unprintable characters using <backslash>uxxxx notation
* for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
* above. If the character is printable ASCII, then do nothing
* and return FALSE. Otherwise, append the escaped notation and
* return TRUE.
*/
public static boolean escapeUnprintable(StringBuffer result, int c) {
if (isUnprintable(c)) {
result.append('\\');
if ((c & ~0xFFFF) != 0) {
result.append('U');
result.append(DIGITS[0xF&(c>>28)]);
result.append(DIGITS[0xF&(c>>24)]);
result.append(DIGITS[0xF&(c>>20)]);
result.append(DIGITS[0xF&(c>>16)]);
} else {
result.append('u');
}
result.append(DIGITS[0xF&(c>>12)]);
result.append(DIGITS[0xF&(c>>8)]);
result.append(DIGITS[0xF&(c>>4)]);
result.append(DIGITS[0xF&c]);
return true;
}
return false;
}
static final char DIGITS[] =
{
'0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9',
'A',
'B',
'C',
'D',
'E',
'F',
'G',
'H',
'I',
'J',
'K',
'L',
'M',
'N',
'O',
'P',
'Q',
'R',
'S',
'T',
'U',
'V',
'W',
'X',
'Y',
'Z' };
/**
* Return true if the character is NOT printable ASCII. The tab,
* newline and linefeed characters are considered unprintable.
*/
public static boolean isUnprintable(int c) {
return !(c >= 0x20 && c <= 0x7E);
}
/**
* Escape unprintable characters using <backslash>uxxxx notation
* for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
* above. If the character is printable ASCII, then do nothing
* and return FALSE. Otherwise, append the escaped notation and
* return TRUE.
*/
public static boolean escapeUnprintable(StringBuffer result, int c) {
if (isUnprintable(c)) {
result.append('\\');
if ((c & ~0xFFFF) != 0) {
result.append('U');
result.append(DIGITS[0xF & (c >> 28)]);
result.append(DIGITS[0xF & (c >> 24)]);
result.append(DIGITS[0xF & (c >> 20)]);
result.append(DIGITS[0xF & (c >> 16)]);
} else {
result.append('u');
}
result.append(DIGITS[0xF & (c >> 12)]);
result.append(DIGITS[0xF & (c >> 8)]);
result.append(DIGITS[0xF & (c >> 4)]);
result.append(DIGITS[0xF & c]);
return true;
}
return false;
}
static class Lock {
private int count;
static class Lock {
private int count;
synchronized void inc() {
++count;
}
synchronized void inc() {
++count;
}
synchronized void dec() {
--count;
}
synchronized void dec() {
--count;
}
synchronized int count() {
return count;
}
synchronized int count() {
return count;
}
void go() {
try {
while (count() > 0) {
synchronized(this) {
notifyAll();
}
Thread.sleep(50);
}
}
catch (InterruptedException e) {
}
}
}
void go() {
try {
while (count() > 0) {
synchronized (this) {
notifyAll();
}
Thread.sleep(50);
}
} catch (InterruptedException e) {
}
}
}
static class TestThread extends Thread {
Lock lock;
Runnable target;
static class TestThread extends Thread {
Lock lock;
Runnable target;
TestThread(Lock lock, Runnable target) {
this.lock = lock;
this.target = target;
TestThread(Lock lock, Runnable target) {
this.lock = lock;
this.target = target;
lock.inc();
}
public void run() {
try {
synchronized (lock) {
lock.wait();
}
target.run();
}
catch (InterruptedException e) {
}
lock.inc();
}
lock.dec();
}
}
public void run() {
try {
synchronized (lock) {
lock.wait();
}
target.run();
} catch (InterruptedException e) {
}
public static void runUntilDone(Runnable[] targets) {
if (targets == null) {
throw new IllegalArgumentException("targets is null");
}
if (targets.length == 0) {
return;
}
lock.dec();
}
}
Lock lock = new Lock();
for (int i = 0; i < targets.length; ++i) {
new TestThread(lock, targets[i]).start();
}
public static void runUntilDone(Runnable[] targets) {
if (targets == null) {
throw new IllegalArgumentException("targets is null");
}
if (targets.length == 0) {
return;
}
lock.go();
}
Lock lock = new Lock();
for (int i = 0; i < targets.length; ++i) {
new TestThread(lock, targets[i]).start();
}
lock.go();
}
}

View file

@ -0,0 +1,14 @@
Manifest-Version: 1.0
Class-Path: icu4j.jar
Main-Class: com.ibm.icu.dev.test.TestAll
Name: com/ibm/icu/
Specification-Title: ICU for Java Tests
Specification-Version: 2.8
Specification-Vendor: ICU
Implementation-Title: ICU for Java
Implementation-Version: 2.8.0
Implementation-Vendor: IBM Corporation
Implementation-Vendor-Id: com.ibm
Copyright-Info: Copyright (c) 2000-2004, International Business Machines Corporation and others. All Rights Reserved.
Sealed: false

View file

@ -82,17 +82,10 @@ class NormalizerBuilder {
*/
private static void readExclusionList(BitSet isExcluded) throws java.io.IOException {
if (DEBUG) System.out.println("Reading Exclusions");
//BufferedReader in = new BufferedReader(new FileReader(COMPOSITION_EXCLUSIONS), 5*1024);
BufferedReader in = null;
try {
in = TestUtil.getDataReader("unicode/CompositionExclusions.txt");
} catch (Exception e) {
System.err.println("Fail to read the file CompositionExclusions.txt!");
System.exit(1);
}
BufferedReader in = TestUtil.getDataReader("unicode/CompositionExclusions.txt");
while (true) {
// read a line, discarding comments and blank lines
String line = in.readLine();
@ -511,4 +504,4 @@ class NormalizerBuilder {
}
return result.toString();
}
}
}

View file

@ -5,15 +5,14 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/rbbi/BreakIteratorRules_en_US_TEST.java,v $
* $Date: 2002/02/16 03:05:14 $
* $Revision: 1.8 $
* $Date: 2004/02/06 21:53:59 $
* $Revision: 1.9 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.rbbi;
import java.util.ListResourceBundle;
import java.net.URL;
/**
* This resource bundle is included for testing and demonstration purposes only.
@ -22,201 +21,205 @@ import java.net.URL;
* with good resource data (and a good dictionary file) for Thai
*/
public class BreakIteratorRules_en_US_TEST extends ListResourceBundle {
private static final URL url =
BreakIteratorRules_en_US_TEST.class.getResource("/com/ibm/data/misc/english.dict");
private static final String DATA_NAME = "/com/ibm/icu/dev/data/rbbi/english.dict";
public Object[][] getContents() {
return contents;
}
// calling code will handle case where dictionary does not exist
static final Object[][] contents = {
// names of classes to instantiate for the different kinds of break
// iterator. Notice we're now using DictionaryBasedBreakIterator
// for word and line breaking.
{ "BreakIteratorClasses",
new String[] { "RuleBasedBreakIterator", // character-break iterator class
"DictionaryBasedBreakIterator", // word-break iterator class
"DictionaryBasedBreakIterator", // line-break iterator class
"RuleBasedBreakIterator" } // sentence-break iterator class
},
public Object[][] getContents() {
return new Object[][] {
// names of classes to instantiate for the different kinds of break
// iterator. Notice we're now using DictionaryBasedBreakIterator
// for word and line breaking.
{ "BreakIteratorClasses",
new String[] {
"RuleBasedBreakIterator",
// character-break iterator class
"DictionaryBasedBreakIterator",
// word-break iterator class
"DictionaryBasedBreakIterator",
// line-break iterator class
"RuleBasedBreakIterator" } // sentence-break iterator class
},
// These are the same word-breaking rules as are specified in the default
// resource, except that the Latin letters, apostrophe, and hyphen are
// specified as dictionary characters
{ "WordBreakRules",
// ignore non-spacing marks, enclosing marks, and format characters,
// all of which should not influence the algorithm
"$_ignore_=[[:Mn:][:Me:][:Cf:]];"
// These are the same word-breaking rules as are specified in the default
// resource, except that the Latin letters, apostrophe, and hyphen are
// specified as dictionary characters
{
"WordBreakRules",
// ignore non-spacing marks, enclosing marks, and format characters,
// all of which should not influence the algorithm
"$_ignore_=[[:Mn:][:Me:][:Cf:]];"
// lower and upper case Roman letters, apostrophy and dash are
// in the English dictionary
+ "$_dictionary_=[a-zA-Z\\'\\-];"
// lower and upper case Roman letters, apostrophy and dash are
// in the English dictionary
+"$_dictionary_=[a-zA-Z\\'\\-];"
// Hindi phrase separator, kanji, katakana, hiragana, CJK diacriticals,
// other letters, and digits
+ "$danda=[\u0964\u0965];"
+ "$kanji=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];"
+ "$kata=[\u3099-\u309c\u30a1-\u30fe];"
+ "$hira=[\u3041-\u309e\u30fc];"
+ "$let=[[[:L:][:Mc:]]-[$kanji$kata$hira]];"
+ "$dgt=[:N:];"
// Hindi phrase separator, kanji, katakana, hiragana, CJK diacriticals,
// other letters, and digits
+"$danda=[\u0964\u0965];"
+ "$kanji=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];"
+ "$kata=[\u3099-\u309c\u30a1-\u30fe];"
+ "$hira=[\u3041-\u309e\u30fc];"
+ "$let=[[[:L:][:Mc:]]-[$kanji$kata$hira]];"
+ "$dgt=[:N:];"
// punctuation that can occur in the middle of a word: currently
// dashes, apostrophes, and quotation marks
+ "$mid_word=[[:Pd:]\u00ad\u2027\\\"\\\'];"
// punctuation that can occur in the middle of a word: currently
// dashes, apostrophes, and quotation marks
+"$mid_word=[[:Pd:]\u00ad\u2027\\\"\\\'];"
// punctuation that can occur in the middle of a number: currently
// apostrophes, qoutation marks, periods, commas, and the Arabic
// decimal point
+ "$mid_num=[\\\"\\\'\\,\u066b\\.];"
// punctuation that can occur in the middle of a number: currently
// apostrophes, qoutation marks, periods, commas, and the Arabic
// decimal point
+"$mid_num=[\\\"\\\'\\,\u066b\\.];"
// punctuation that can occur at the beginning of a number: currently
// the period, the number sign, and all currency symbols except the cents sign
+ "$pre_num=[[[:Sc:]-[\u00a2]]\\#\\.];"
// punctuation that can occur at the beginning of a number: currently
// the period, the number sign, and all currency symbols except the cents sign
+"$pre_num=[[[:Sc:]-[\u00a2]]\\#\\.];"
// punctuation that can occur at the end of a number: currently
// the percent, per-thousand, per-ten-thousand, and Arabic percent
// signs, the cents sign, and the ampersand
+ "$post_num=[\\%\\&\u00a2\u066a\u2030\u2031];"
// punctuation that can occur at the end of a number: currently
// the percent, per-thousand, per-ten-thousand, and Arabic percent
// signs, the cents sign, and the ampersand
+"$post_num=[\\%\\&\u00a2\u066a\u2030\u2031];"
// line separators: currently LF, FF, PS, and LS
+ "$ls=[\n\u000c\u2028\u2029];"
// line separators: currently LF, FF, PS, and LS
+"$ls=[\n\u000c\u2028\u2029];"
// whitespace: all space separators and the tab character
+ "$ws=[[:Zs:]\t];"
// whitespace: all space separators and the tab character
+"$ws=[[:Zs:]\t];"
// a word is a sequence of letters that may contain internal
// punctuation, as long as it begins and ends with a letter and
// never contains two punctuation marks in a row
+ "$word=($let+($mid_word$let+)*$danda?);"
// a word is a sequence of letters that may contain internal
// punctuation, as long as it begins and ends with a letter and
// never contains two punctuation marks in a row
+"$word=($let+($mid_word$let+)*$danda?);"
// a number is a sequence of digits that may contain internal
// punctuation, as long as it begins and ends with a digit and
// never contains two punctuation marks in a row.
+ "$number=($dgt+($mid_num$dgt+)*);"
// a number is a sequence of digits that may contain internal
// punctuation, as long as it begins and ends with a digit and
// never contains two punctuation marks in a row.
+"$number=($dgt+($mid_num$dgt+)*);"
// break after every character, with the following exceptions
// (this will cause punctuation marks that aren't considered
// part of words or numbers to be treated as words unto themselves)
+ ".;"
// break after every character, with the following exceptions
// (this will cause punctuation marks that aren't considered
// part of words or numbers to be treated as words unto themselves)
+".;"
// keep together any sequence of contiguous words and numbers
// (including just one of either), plus an optional trailing
// number-suffix character
+ "$word?($number$word)*($number$post_num?)?;"
// keep together any sequence of contiguous words and numbers
// (including just one of either), plus an optional trailing
// number-suffix character
+"$word?($number$word)*($number$post_num?)?;"
// keep together and sequence of contiguous words and numbers
// that starts with a number-prefix character and a number,
// and may end with a number-suffix character
+ "$pre_num($number$word)*($number$post_num?)?;"
// keep together and sequence of contiguous words and numbers
// that starts with a number-prefix character and a number,
// and may end with a number-suffix character
+"$pre_num($number$word)*($number$post_num?)?;"
// keep together runs of whitespace (optionally with a single trailing
// line separator or CRLF sequence)
+ "$ws*\r?$ls?;"
// keep together runs of whitespace (optionally with a single trailing
// line separator or CRLF sequence)
+"$ws*\r?$ls?;"
// keep together runs of Katakana
+ "$kata*;"
// keep together runs of Katakana
+"$kata*;"
// keep together runs of Hiragana
+ "$hira*;"
// keep together runs of Hiragana
+"$hira*;"
// keep together runs of Kanji
+ "$kanji*;"},
// keep together runs of Kanji
+"$kanji*;" },
// These are the same line-breaking rules as are specified in the default
// resource, except that the Latin letters, apostrophe, and hyphen are
// specified as dictionary characters
{ "LineBreakRules",
// ignore non-spacing marks, enclosing marks, and format characters
"$_ignore_=[[:Mn:][:Me:][:Cf:]];"
// These are the same line-breaking rules as are specified in the default
// resource, except that the Latin letters, apostrophe, and hyphen are
// specified as dictionary characters
{ "LineBreakRules",
// ignore non-spacing marks, enclosing marks, and format characters
"$_ignore_=[[:Mn:][:Me:][:Cf:]];"
// lower and upper case Roman letters, apostrophy and dash
// are in the English dictionary
+ "$_dictionary_=[a-zA-Z\\'\\-];"
// lower and upper case Roman letters, apostrophy and dash
// are in the English dictionary
+"$_dictionary_=[a-zA-Z\\'\\-];"
// Hindi phrase separators
+ "$danda=[\u0964\u0965];"
// Hindi phrase separators
+"$danda=[\u0964\u0965];"
// characters that always cause a break: ETX, tab, LF, FF, LS, and PS
+ "$break=[\u0003\t\n\f\u2028\u2029];"
// characters that always cause a break: ETX, tab, LF, FF, LS, and PS
+"$break=[\u0003\t\n\f\u2028\u2029];"
// characters that always prevent a break: the non-breaking space
// and similar characters
+ "$nbsp=[\u00a0\u2007\u2011\ufeff];"
// characters that always prevent a break: the non-breaking space
// and similar characters
+"$nbsp=[\u00a0\u2007\u2011\ufeff];"
// whitespace: space separators and control characters, except for
// CR and the other characters mentioned above
+ "$space=[[[:Zs:][:Cc:]]-[$nbsp$break\r]];"
// whitespace: space separators and control characters, except for
// CR and the other characters mentioned above
+"$space=[[[:Zs:][:Cc:]]-[$nbsp$break\r]];"
// dashes: dash punctuation and the discretionary hyphen, except for
// non-breaking hyphens
+ "$dash=[[[:Pd:]\u00ad]-[$nbsp]];"
// dashes: dash punctuation and the discretionary hyphen, except for
// non-breaking hyphens
+"$dash=[[[:Pd:]\u00ad]-[$nbsp]];"
// characters that stick to a word if they precede it: currency symbols
// (except the cents sign) and starting punctuation
+ "$pre_word=[[[:Sc:]-[\u00a2]][:Ps:]\\\"\\\'];"
// characters that stick to a word if they precede it: currency symbols
// (except the cents sign) and starting punctuation
+"$pre_word=[[[:Sc:]-[\u00a2]][:Ps:]\\\"\\\'];"
// characters that stick to a word if they follow it: ending punctuation,
// other punctuation that usually occurs at the end of a sentence,
// small Kana characters, some CJK diacritics, etc.
+ "$post_word=[[:Pe:]\\!\\\"\\\'\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034"
+ "\u2103\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063"
+ "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9"
+ "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0c"
+ "\uff0e\uff1f];"
// characters that stick to a word if they follow it: ending punctuation,
// other punctuation that usually occurs at the end of a sentence,
// small Kana characters, some CJK diacritics, etc.
+"$post_word=[[:Pe:]\\!\\\"\\\'\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034"
+ "\u2103\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063"
+ "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9"
+ "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0c"
+ "\uff0e\uff1f];"
// Kanji: actually includes both Kanji and Kana, except for small Kana and
// CJK diacritics
+ "$kanji=[[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa]-[$post_word$_ignore_]];"
// Kanji: actually includes both Kanji and Kana, except for small Kana and
// CJK diacritics
+"$kanji=[[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa]-[$post_word$_ignore_]];"
// digits
+ "$digit=[[:Nd:][:No:]];"
// digits
+"$digit=[[:Nd:][:No:]];"
// punctuation that can occur in the middle of a number: periods and commas
+ "$mid_num=[\\.\\,];"
// punctuation that can occur in the middle of a number: periods and commas
+"$mid_num=[\\.\\,];"
// everything not mentioned above, plus the quote marks (which are both
// <pre-word>, <post-word>, and <char>)
+ "$char=[^$break$space$dash$kanji$nbsp$_ignore_$pre_word$post_word$mid_num$danda\r\\\"\\\'];"
// everything not mentioned above, plus the quote marks (which are both
// <pre-word>, <post-word>, and <char>)
+"$char=[^$break$space$dash$kanji$nbsp$_ignore_$pre_word$post_word$mid_num$danda\r\\\"\\\'];"
// a "number" is a run of prefix characters and dashes, followed by one or
// more digits with isolated number-punctuation characters interspersed
+ "$number=([$pre_word$dash]*$digit+($mid_num$digit+)*);"
// a "number" is a run of prefix characters and dashes, followed by one or
// more digits with isolated number-punctuation characters interspersed
+"$number=([$pre_word$dash]*$digit+($mid_num$digit+)*);"
// the basic core of a word can be either a "number" as defined above, a single
// "Kanji" character, or a run of any number of not-explicitly-mentioned
// characters (this includes Latin letters)
+ "$word_core=([$pre_word$char]*|$kanji|$number);"
// the basic core of a word can be either a "number" as defined above, a single
// "Kanji" character, or a run of any number of not-explicitly-mentioned
// characters (this includes Latin letters)
+"$word_core=([$pre_word$char]*|$kanji|$number);"
// a word may end with an optional suffix that be either a run of one or
// more dashes or a run of word-suffix characters, followed by an optional
// run of whitespace
+ "$word_suffix=(($dash+|$post_word*)$space*);"
// a word may end with an optional suffix that be either a run of one or
// more dashes or a run of word-suffix characters, followed by an optional
// run of whitespace
+"$word_suffix=(($dash+|$post_word*)$space*);"
// a word, thus, is an optional run of word-prefix characters, followed by
// a word core and a word suffix (the syntax of <word-core> and <word-suffix>
// actually allows either of them to match the empty string, putting a break
// between things like ")(" or "aaa(aaa"
+ "$word=($pre_word*$word_core$word_suffix);"
// a word, thus, is an optional run of word-prefix characters, followed by
// a word core and a word suffix (the syntax of <word-core> and <word-suffix>
// actually allows either of them to match the empty string, putting a break
// between things like ")(" or "aaa(aaa"
+"$word=($pre_word*$word_core$word_suffix);"
// finally, the rule that does the work: Keep together any run of words that
// are joined by runs of one of more non-spacing mark. Also keep a trailing
// line-break character or CRLF combination with the word. (line separators
// "win" over nbsp's)
+ "$word($nbsp+$word)*\r?$break?;" },
// finally, the rule that does the work: Keep together any run of words that
// are joined by runs of one of more non-spacing mark. Also keep a trailing
// line-break character or CRLF combination with the word. (line separators
// "win" over nbsp's)
+"$word($nbsp+$word)*\r?$break?;" },
// these two resources specify the pathnames of the dictionary files to
// use for word breaking and line breaking. Both currently refer to
// a file called english.dict placed in com.ibm.icu.impl.data
// somewhere in the class path. It's important to note that
// english.dict was created for testing purposes only, and doesn't
// come anywhere close to being an exhaustive dictionary of English
// words (basically, it contains all the words in the Declaration of
// Independence, and the Revised Standard Version of the book of Genesis,
// plus a few other words thrown in to show more interesting cases).
// { "WordBreakDictionary", "com\\ibm\\text\\resources\\english.dict" },
// { "LineBreakDictionary", "com\\ibm\\text\\resources\\english.dict" }
{ "WordBreakDictionary", url },
{ "LineBreakDictionary", url }
};
// these two resources specify the pathnames of the dictionary files to
// use for word breaking and line breaking. Both currently refer to
// a file called english.dict placed in com.ibm.icu.impl.data
// somewhere in the class path. It's important to note that
// english.dict was created for testing purposes only, and doesn't
// come anywhere close to being an exhaustive dictionary of English
// words (basically, it contains all the words in the Declaration of
// Independence, and the Revised Standard Version of the book of Genesis,
// plus a few other words thrown in to show more interesting cases).
// { "WordBreakDictionary", "com\\ibm\\text\\resources\\english.dict" },
// { "LineBreakDictionary", "com\\ibm\\text\\resources\\english.dict" }
{ "WordBreakDictionary", DATA_NAME },
{ "LineBreakDictionary", DATA_NAME }
};
}
}

View file

@ -5,16 +5,54 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/rbbi/SimpleBITest.java,v $
* $Date: 2003/06/03 18:49:30 $
* $Revision: 1.8 $
* $Date: 2004/02/06 21:53:59 $
* $Revision: 1.9 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.rbbi;
import java.util.Locale;
import com.ibm.icu.text.BreakIterator;
import java.io.IOException;
import java.io.InputStream;
import java.util.ListResourceBundle;
import java.util.MissingResourceException;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.DictionaryBasedBreakIterator;
import com.ibm.icu.text.RuleBasedBreakIterator;
// TODO: {dlf} this test currently doesn't test anything!
// You'll notice that the resource that uses the dictionary isn't even on the resource path,
// so the dictionary never gets used. Good thing, too, because it would throw a security
// exception if run with a security manager. Not that it would matter, the dictionary
// resource isn't even in the icu source tree!
// In order to fix this:
// 1) make sure english.dict matches the current dictionary format required by dbbi
// 2) make sure english.dict gets included in icu4jtests.jar
// 3) have this test use getResourceAsStream to get a stream on the dictionary, and
// directly instantiate a DictionaryBasedBreakIterator. It can use the rules from
// the appropriate section of ResourceBundle_en_US_TEST. I'd suggest just copying
// the rules into this file.
// 4) change the test text by inserting '|' at word breaks, and '||' at line breaks.
// 5) process this text to a) create tables of break indices, and b) clean up the test
// for the break iterator to work on
//
// This would NOT test the ability to load dictionary-based break iterators through our
// normal resource mechanism. One could install such a break iterator and its
// resources into the icu4j jar, and it would work, but there's no way to register entire
// resources from outside yet. Even if there were, the access restrictions are a bit
// difficult to manage, if one wanted to register a break iterator whose code and data
// resides outside the icu4j jar. Since the code to instantiate would be going through
// two protection domains, each domain would have to allow access to the data-- but
// icu4j's domain wouldn't know about ours. So we could instantiate before registering
// the break iterator, but this would mean we'd have to fully initialize the dictionary(s)
// at instantiation time, rather than let this be deferred until they are actually needed.
//
// I've done items 2 and 3 above. Unfortunately, since I haven't done item 1, the
// dictionary builder crashes. So for now I'm disabling this test. This is not
// that important, since we have a thai dictionary that we do test thoroughly.
//
public class SimpleBITest extends TestFmwk{
public static final String testText =
@ -103,21 +141,78 @@ public class SimpleBITest extends TestFmwk{
public static void main(String[] args) throws Exception {
new SimpleBITest().run(args);
}
protected boolean validate() {
// TODO: remove when english.dict gets fixed
return false;
}
private BreakIterator createTestIterator(int kind) {
final String bname = "com.ibm.icu.dev.test.rbbi.BreakIteratorRules_en_US_TEST";
BreakIterator iter = null;
ListResourceBundle bundle = null;
try {
Class cls = Class.forName(bname);
bundle = (ListResourceBundle)cls.newInstance();
}
catch (Exception e) {
///CLOVER:OFF
errln("could not create bundle: " + bname + "exception: " + e.getMessage());
///CLOVER:ON
return null;
}
final String[] kindNames = {
"Character", "Word", "Line", "Sentence"
};
String rulesName = kindNames[kind] + "BreakRules";
String dictionaryName = kindNames[kind] + "BreakDictionary";
String[] classNames = bundle.getStringArray("BreakIteratorClasses");
String rules = bundle.getString(rulesName);
if (classNames[kind].equals("RuleBasedBreakIterator")) {
iter = new RuleBasedBreakIterator(rules);
}
else if (classNames[kind].equals("DictionaryBasedBreakIterator")) {
try {
String dictionaryPath = bundle.getString(dictionaryName);
InputStream dictionary = bundle.getClass().getResourceAsStream(dictionaryPath);
System.out.println("looking for " + dictionaryPath + " from " + bundle.getClass() + " returned " + dictionary);
iter = new DictionaryBasedBreakIterator(rules, dictionary);
}
catch(IOException e) {
e.printStackTrace();
errln(e.getMessage());
System.out.println(e); // debug
}
catch(MissingResourceException e) {
errln(e.getMessage());
System.out.println(e); // debug
}
}
if (iter == null) {
errln("could not create iterator");
}
return iter;
}
public void testWordBreak() throws Exception {
BreakIterator wordBreak =(BreakIterator) BreakIterator.getWordInstance(new Locale("en", "US", "TEST"));
BreakIterator wordBreak = createTestIterator(BreakIterator.KIND_WORD);
int breaks = doTest(wordBreak);
logln(String.valueOf(breaks));
}
public void testLineBreak() throws Exception {
BreakIterator lineBreak = BreakIterator.getLineInstance(new Locale("en", "US", "TEST"));
BreakIterator lineBreak = createTestIterator(BreakIterator.KIND_LINE);
int breaks = doTest(lineBreak);
logln(String.valueOf(breaks));
}
public void testSentenceBreak() throws Exception {
BreakIterator sentenceBreak = BreakIterator.getSentenceInstance(new Locale("en", "US", "TEST"));
BreakIterator sentenceBreak = createTestIterator(BreakIterator.KIND_SENTENCE);
int breaks = doTest(sentenceBreak);
logln(String.valueOf(breaks));
}

View file

@ -1,41 +1,8 @@
// Standard extensions get all permissions by default
grant codeBase "file:${java.home}/lib/ext/*" {
permission java.security.AllPermission;
};
// default permissions granted to all domains
grant {
// allows anyone to listen on un-privileged ports
permission java.net.SocketPermission "localhost:1024-", "listen";
// "standard" properies that can be read by anyone
permission java.util.PropertyPermission "java.version", "read";
permission java.util.PropertyPermission "java.vendor", "read";
permission java.util.PropertyPermission "java.vendor.url", "read";
permission java.util.PropertyPermission "java.class.version", "read";
permission java.util.PropertyPermission "os.name", "read";
permission java.util.PropertyPermission "os.version", "read";
permission java.util.PropertyPermission "os.arch", "read";
permission java.util.PropertyPermission "file.separator", "read";
permission java.util.PropertyPermission "path.separator", "read";
permission java.util.PropertyPermission "line.separator", "read";
permission java.util.PropertyPermission "java.specification.version", "read";
permission java.util.PropertyPermission "java.specification.vendor", "read";
permission java.util.PropertyPermission "java.specification.name", "read";
permission java.util.PropertyPermission "java.vm.specification.version", "read";
permission java.util.PropertyPermission "java.vm.specification.vendor", "read";
permission java.util.PropertyPermission "java.vm.specification.name", "read";
permission java.util.PropertyPermission "java.vm.version", "read";
permission java.util.PropertyPermission "java.vm.vendor", "read";
permission java.util.PropertyPermission "java.vm.name", "read";
/// policies required by test framework
// policies needed to run tests
grant // codebase "file:${user.dir}/icu4jtests.jar"
{
// temporary for debugging
// permission java.lang.RuntimePermission "getProtectionDomain";
// needed for Locale.setDefault, only used in tests and demos
permission java.util.PropertyPermission "user.language", "write";
@ -43,8 +10,8 @@ grant {
// needed for TestUtils
permission java.util.PropertyPermission "ICUDataPath", "read";
permission java.util.PropertyPermission "user.dir", "read";
// yuck, why doesn't the security code normalize file paths?!?!? supply both versions...
permission java.io.FilePermission "src\\com\\ibm\\icu\\dev\\data\\-", "read";
permission java.io.FilePermission "src/com/ibm/icu/dev/data/-", "read";
// time zone tests
permission java.util.PropertyPermission "user.timezone", "read";
};

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/TestIDNA.java,v $
* $Date: 2003/12/02 03:17:15 $
* $Revision: 1.6 $
* $Date: 2004/02/06 21:53:59 $
* $Revision: 1.7 $
*
*******************************************************************************
*/
@ -21,7 +21,7 @@ import com.ibm.icu.text.StringPrepParseException;
import com.ibm.icu.text.StringPrep;
import com.ibm.icu.text.UCharacterIterator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.impl.LocaleUtility;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.Utility;
/**
@ -294,7 +294,7 @@ public class TestIDNA extends TestFmwk {
}
}
public void TestNamePrepConformance() throws Exception{
InputStream stream = LocaleUtility.getImplDataResourceAsStream("uidna.spp");
InputStream stream = ICUData.getRequiredStream("data/uidna.spp");
StringPrep namePrep = new StringPrep(stream);
for(int i=0; i<TestData.conformanceTestCases.length;i++){
TestData.ConformanceTestCase testCase = TestData.conformanceTestCases[i];

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/timezone/TimeZoneTest.java,v $
* $Date: 2004/01/05 23:00:14 $
* $Revision: 1.18 $
* $Date: 2004/02/06 21:53:59 $
* $Revision: 1.19 $
*
*******************************************************************************
*/
@ -833,7 +833,9 @@ public class TimeZoneTest extends TestFmwk
final Class[] argtypes = new Class[0];
java.lang.reflect.Method m = tz_java.getClass().getMethod("getDSTSavings", argtypes);
dst_java = ((Integer) m.invoke(tz_java, args)).intValue();
} catch (Exception e) {
} catch (Exception e) {
// see JDKTimeZone for the reason for this code
dst_java = 3600000;
}
com.ibm.icu.util.TimeZone tz_icu = com.ibm.icu.util.TimeZone.getTimeZone(tzName);

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/CompoundTransliteratorTest.java,v $
* $Date: 2003/06/03 18:49:31 $
* $Revision: 1.8 $
* $Date: 2004/02/06 21:54:06 $
* $Revision: 1.9 $
*
*****************************************************************************************
*/
@ -41,7 +41,7 @@ public class CompoundTransliteratorTest extends TestFmwk {
t3=Transliterator.getInstance(names[2]);
t4=Transliterator.getInstance(names[3]);
}catch(IllegalArgumentException ex) {
errln("FAIL: Transliterator construction failed");
errln("FAIL: Transliterator construction failed" + ex.getMessage());
throw ex;
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/ICUListResourceBundleTest.java,v $
* $Date: 2003/11/21 22:20:36 $
* $Revision: 1.12 $
* $Date: 2004/02/06 21:54:05 $
* $Revision: 1.13 $
*
*******************************************************************************
*/
@ -158,6 +158,8 @@ public final class ICUListResourceBundleTest extends TestFmwk
}
return isEqual;
}
//
public void TestAliases(){
ResourceBundle rb = ICULocaleData.getResourceBundle("com.ibm.icu.dev.data","TestDataElements","testaliases");
//rb.getObject("CollationElements");

View file

@ -0,0 +1,69 @@
/*
* Created on Feb 4, 2004
*/
package com.ibm.icu.impl;
import java.io.InputStream;
import java.net.URL;
import java.security.AccessController;
import java.security.PrivilegedAction;
/**
* @author dougfelt
*
* Provides access to ICU data files as InputStreams. Implements security checking.
*/
public final class ICUData {
/*
* Return a URL to the ICU resource names resourceName. The
* resource name should either be an absolute path, or a path relative to
* com.ibm.icu.impl (e.g., most likely it is 'data/foo'). If required
* is true, throw an InternalError instead of returning a null result.
*/
public static boolean exists(final String resourceName) {
URL i = null;
if (System.getSecurityManager() != null) {
i = (URL)AccessController.doPrivileged(
new PrivilegedAction() {
public Object run() {
return ICUData.class.getResource(resourceName);
}
});
} else {
i = ICUData.class.getResource(resourceName);
}
return i != null;
}
private static InputStream getStream(final String resourceName, boolean required) {
InputStream i = null;
if (System.getSecurityManager() != null) {
i = (InputStream)AccessController.doPrivileged(
new PrivilegedAction() {
public Object run() {
return ICUData.class.getResourceAsStream(resourceName);
}
});
} else {
i = ICUData.class.getResourceAsStream(resourceName);
}
if (i == null && required) {
throw new InternalError("could not locate data " + resourceName);
}
return i;
}
/*
* Convenience override that calls get(resourceName, false);
*/
public static InputStream getStream(String resourceName) {
return getStream(resourceName, false);
}
/*
* Convenience method that calls get(resourceName, true).
*/
public static InputStream getRequiredStream(String resourceName) {
return getStream(resourceName, true);
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/ICUListResourceBundle.java,v $
* $Date: 2003/12/31 21:23:41 $
* $Revision: 1.18 $
* $Date: 2004/02/06 21:54:01 $
* $Revision: 1.19 $
*
*******************************************************************************
*/
@ -238,17 +238,16 @@ public class ICUListResourceBundle extends ListResourceBundle {
private byte[] expanded=null;
private String resName=null;
public ResourceBinary(String name){
resName=name;
resName="data/" + name;
}
public Object getResource(Object obj) throws Exception{
if(expanded==null){
InputStream stream = obj.getClass().getResourceAsStream(resName);
InputStream stream = ICUData.getStream(resName);
if(stream!=null){
//throw new MissingResourceException("",obj.getClass().getName(),resName);
expanded = readToEOS(stream);
return expanded;
}
}
return "";
}
@ -258,12 +257,12 @@ public class ICUListResourceBundle extends ListResourceBundle {
private char[] expanded=null;
private String resName=null;
public ResourceString(String name){
resName=name;
resName="data/"+name;
}
public Object getResource(Object obj) throws Exception{
if(expanded==null){
// Resource strings are always UTF-8
InputStream stream = obj.getClass().getResourceAsStream(resName);
InputStream stream = ICUData.getStream(resName);
if(stream!=null){
//throw new MissingResourceException("",obj.getClass().getName(),resName);

View file

@ -25,7 +25,7 @@ public class ICULocaleData {
private static Locale[] localeList;
private static final String PACKAGE1 = "com.ibm.icu.impl.data";
private static final String[] packageNames = { PACKAGE1 };
private static boolean debug = ICUDebug.enabled("localedata");
private static final boolean debug = ICUDebug.enabled("localedata");
/**
* Returns a list of the installed locales.
@ -207,10 +207,10 @@ public class ICULocaleData {
} else {
i = name.length();
}
Class cls = ICULocaleData.class.getClassLoader().loadClass(name);
if (ICUListResourceBundle.class.isAssignableFrom(cls)) {
ICUListResourceBundle bx = (ICUListResourceBundle)cls.newInstance();
if (parent != null) {
bx.setParentX(parent);
@ -385,6 +385,7 @@ public class ICULocaleData {
// ignore, keep looking
}
catch (Exception e) {
e.printStackTrace();
if (debug) {
System.out.println(e.getMessage());
}
@ -418,7 +419,7 @@ public class ICULocaleData {
return Collections.unmodifiableSet(set);
}
catch (MissingResourceException e) {
System.out.println("couldn't find index for bundleName: " + bundleName);
if (debug) System.out.println("couldn't find index for bundleName: " + bundleName);
Thread.dumpStack();
}
return Collections.EMPTY_SET;
@ -435,7 +436,7 @@ public class ICULocaleData {
return locales;
}
catch (MissingResourceException e) {
System.out.println("couldn't find index for bundleName: " + bundleName);
if (debug) System.out.println("couldn't find index for bundleName: " + bundleName);
Thread.dumpStack();
}
return new Locale[0];

View file

@ -210,8 +210,14 @@ public class JDKTimeZone extends TimeZone {
final Class[] argtypes = new Class[0];
Method m = zone.getClass().getMethod("getDSTSavings", argtypes);
return ((Integer) m.invoke(zone, args)).intValue();
} catch (Exception e) {
// should never happen
} catch (Exception e) {
// if zone is in the sun.foo class hierarchy and we
// are in a protection domain, we'll get a security
// exception. And if we claim to support DST, but
// return a value of 0, later java.util.SimpleTimeZone will
// throw an illegalargument exception. so... fake
// the dstoffset;
return 3600000;
}
}
return 0;

View file

@ -5,14 +5,13 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/LocaleUtility.java,v $
* $Date: 2003/08/21 23:41:25 $
* $Revision: 1.9 $
* $Date: 2004/02/06 21:54:00 $
* $Revision: 1.10 $
* *****************************************************************************************
*/
package com.ibm.icu.impl;
import java.io.InputStream;
import java.util.Locale;
/**
@ -132,10 +131,4 @@ public class LocaleUtility {
}
return new Locale(parts[0], parts[1], parts[2]);
}
public static InputStream getImplDataResourceAsStream(String name){
Class myClass = new LocaleUtility().getClass();
String fullName = "data/"+name;
return myClass.getResourceAsStream(fullName);
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/NormalizerImpl.java,v $
* $Date: 2003/11/14 00:06:08 $
* $Revision: 1.23 $
* $Date: 2004/02/06 21:54:00 $
* $Revision: 1.24 $
*******************************************************************************
*/
@ -279,12 +279,12 @@ public final class NormalizerImpl {
* Constructor
* @exception thrown when data reading fails or data corrupted
*/
private NormalizerImpl() throws IOException{
private NormalizerImpl() throws IOException {
//data should be loaded only once
if(!isDataLoaded){
// jar access
InputStream i = getClass().getResourceAsStream(DATA_FILE_NAME);
// jar access
InputStream i = ICUData.getRequiredStream(DATA_FILE_NAME);
BufferedInputStream b = new BufferedInputStream(i,DATA_BUFFER_SIZE);
NormalizerDataReader reader = new NormalizerDataReader(b);
@ -307,7 +307,6 @@ public final class NormalizerImpl {
normTrieImpl = new NormTrieImpl();
auxTrieImpl = new AuxTrieImpl();
// load the rest of the data data and initialize the data members
reader.read(normBytes, fcdBytes,auxBytes, extraData, combiningTable,
canonStartSets);
@ -333,7 +332,6 @@ public final class NormalizerImpl {
);
b.close();
i.close();
}
}

View file

@ -6,8 +6,8 @@
*
* $Source:
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $
* $Date: 2003/06/09 23:15:00 $
* $Revision: 1.6 $
* $Date: 2004/02/06 21:54:02 $
* $Revision: 1.7 $
*
*******************************************************************************
*/
@ -16,7 +16,7 @@ package com.ibm.icu.impl;
import java.io.InputStream;
import java.io.BufferedInputStream;
import java.io.IOException;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.lang.UCharacter;
@ -65,7 +65,7 @@ public final class UCharacterName
try {
INSTANCE_ = new UCharacterName();
}catch(IOException e){
throw new IllegalArgumentException("Could not construct UCharacterName. Missing unames.icu?");
throw new InternalError("Could not construct UCharacterName. Missing unames.icu?");
}
catch (Exception e) {
throw new RuntimeException(e.getMessage());
@ -1192,17 +1192,11 @@ public final class UCharacterName
*/
private UCharacterName() throws IOException
{
InputStream i = getClass().getResourceAsStream(NAME_FILE_NAME_);
if(i!=null ){
BufferedInputStream b = new BufferedInputStream(i,
NAME_BUFFER_SIZE_);
UCharacterNameReader reader = new UCharacterNameReader(b);
reader.read(this);
} else{
throw new IOException("unames.icu could not be opened. Is ICUModularBuild?");
}
i.close();
InputStream is = ICUData.getRequiredStream(NAME_FILE_NAME_);
BufferedInputStream b = new BufferedInputStream(is, NAME_BUFFER_SIZE_);
UCharacterNameReader reader = new UCharacterNameReader(b);
reader.read(this);
b.close();
}
// private methods ---------------------------------------------------

View file

@ -6,8 +6,8 @@
*
* $Source:
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterPropertyDB.java $
* $Date: 2003/12/17 04:56:04 $
* $Revision: 1.35 $
* $Date: 2004/02/06 21:54:01 $
* $Revision: 1.36 $
*
*******************************************************************************
*/
@ -1594,16 +1594,12 @@ public final class UCharacterProperty implements Trie.DataManipulate
private UCharacterProperty() throws IOException
{
// jar access
InputStream i = getClass().getResourceAsStream(DATA_FILE_NAME_);
if(i==null){
throw new IOException("Could not load the file: "+DATA_FILE_NAME_);
}
BufferedInputStream b = new BufferedInputStream(i,
DATA_BUFFER_SIZE_);
InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME_);
BufferedInputStream b = new BufferedInputStream(is, DATA_BUFFER_SIZE_);
UCharacterPropertyReader reader = new UCharacterPropertyReader(b);
reader.read(this);
b.close();
i.close();
m_trie_.putIndexData(this);
}

View file

@ -11,6 +11,7 @@
package com.ibm.icu.impl;
import java.io.*;
import com.ibm.icu.lang.*;
/**
@ -118,7 +119,7 @@ public final class UPropertyAliases implements ICUBinary.Authenticate {
public UPropertyAliases() throws IOException {
// Open the .icu file from the jar/classpath
InputStream is = getClass().getResourceAsStream(DATA_FILE_NAME);
InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME);
BufferedInputStream b = new BufferedInputStream(is, DATA_BUFFER_SIZE);
// Read and discard Unicode version...
/* byte unicodeVersion[] = */ICUBinary.readHeader(b, DATA_FORMAT_ID, this);

View file

@ -5,24 +5,26 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/BreakIteratorRules_th.java,v $
* $Date: 2003/07/03 17:48:12 $
* $Revision: 1.10 $
* $Date: 2004/02/06 21:54:04 $
* $Revision: 1.11 $
*
*****************************************************************************************
*/
package com.ibm.icu.impl.data;
import java.util.ListResourceBundle;
import java.net.URL;
import com.ibm.icu.impl.ICUData;
public class BreakIteratorRules_th extends ListResourceBundle {
public Object[][] getContents() {
private static final String DATA_NAME = "data/BreakDictionaryData_th.brk";
URL url = getClass().getResource("BreakDictionaryData_th.brk");
public Object[][] getContents() {
final boolean exists = ICUData.exists(DATA_NAME);
// if dictionary wasn't found, then this resource bundle doesn't have
// much to contribute...
if (url == null) {
if (!exists) {
return new Object[0][0];
}
@ -235,8 +237,8 @@ public class BreakIteratorRules_th extends ListResourceBundle {
+ "\u0e25[^$paiyannoi$_ignore_]);"
},
{ "WordBreakDictionary", url },
{ "LineBreakDictionary", url }
{ "WordBreakDictionary", DATA_NAME }, // now a path to ICU4J-specific resource
{ "LineBreakDictionary", DATA_NAME }
};
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/ResourceReader.java,v $
* $Date: 2002/08/13 23:37:48 $
* $Revision: 1.5 $
* $Date: 2004/02/06 21:54:04 $
* $Revision: 1.6 $
*
*******************************************************************************
*/
@ -14,6 +14,8 @@ package com.ibm.icu.impl.data;
import java.io.*;
import com.ibm.icu.impl.ICUData;
/**
* A reader for text resource data in the current package. The
* resource data is loaded through the class loader, so it will
@ -47,7 +49,7 @@ public class ResourceReader {
public ResourceReader(String resourceName, String encoding)
throws UnsupportedEncodingException {
this.resourceName = resourceName;
this.resourceName = "data/" + resourceName;
this.encoding = encoding;
isReset = false;
_reset();
@ -60,7 +62,7 @@ public class ResourceReader {
* package
*/
public ResourceReader(String resourceName) {
this.resourceName = resourceName;
this.resourceName = "data/" + resourceName;
this.encoding = null;
isReset = false;
try {
@ -114,10 +116,11 @@ public class ResourceReader {
if (isReset) {
return;
}
InputStream is = getClass().getResourceAsStream(resourceName);
InputStream is = ICUData.getStream(resourceName);
if (is == null) {
throw new IllegalArgumentException("Can't open " + resourceName);
}
InputStreamReader isr =
(encoding == null) ? new InputStreamReader(is) :
new InputStreamReader(is, encoding);

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacter.java,v $
* $Date: 2004/01/07 20:06:24 $
* $Revision: 1.85 $
* $Date: 2004/02/06 21:54:00 $
* $Revision: 1.86 $
*
*******************************************************************************
*/
@ -4417,6 +4417,7 @@ public final class UCharacter
}
catch (Exception e)
{
e.printStackTrace();
//throw new RuntimeException(e.getMessage());
// DONOT throw an exception
// we might be building ICU modularly wothout names.icu and pnames.icu

View file

@ -1,4 +1,6 @@
Manifest-Version: 1.0
Name: com/ibm/icu
Specification-Title: Modularized ICU for Java
Specification-Version: 2.8
Specification-Vendor: ICU
@ -6,7 +8,7 @@ Implementation-Title: Modularized ICU for Java
Implementation-Version: 2.8.0
Implementation-Vendor: IBM Corporation
Implementation-Vendor-Id: com.ibm
Copyright-Info: Copyright (c) 2002-2003, International Business Machines Corporation and others. All Rights Reserved.
Copyright-Info: Copyright (c) 2000-2004, International Business Machines Corporation and others. All Rights Reserved.
Sealed: false
Name: com/ibm/icu

View file

@ -1,4 +1,6 @@
Manifest-Version: 1.0
Name: com/ibm/icu/
Specification-Title: ICU for Java
Specification-Version: 2.8
Specification-Vendor: ICU
@ -6,6 +8,5 @@ Implementation-Title: ICU for Java
Implementation-Version: 2.8.0
Implementation-Vendor: IBM Corporation
Implementation-Vendor-Id: com.ibm
Copyright-Info: Copyright (c) 2002-2003, International Business Machines Corporation and others. All Rights Reserved.
Name: com/ibm/icu
Copyright-Info: Copyright (c) 2000-2004, International Business Machines Corporation and others. All Rights Reserved.
Sealed: false

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/BreakIteratorFactory.java,v $
* $Date: 2004/01/26 23:04:28 $
* $Revision: 1.9 $
* $Date: 2004/02/06 21:54:03 $
* $Revision: 1.10 $
*
*****************************************************************************************
*/
@ -14,11 +14,11 @@ package com.ibm.icu.text;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.Locale;
import java.util.MissingResourceException;
import java.util.ResourceBundle;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICULocaleData;
import com.ibm.icu.impl.ICULocaleService;
import com.ibm.icu.impl.ICUService;
@ -98,28 +98,30 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
String rulesName,
String dictionaryName) {
BreakIterator iter = null;
ResourceBundle bundle = ICULocaleData.getResourceBundle("BreakIteratorRules", where);
String[] classNames = bundle.getStringArray("BreakIteratorClasses");
String rules = bundle.getString(rulesName);
BreakIterator iter = null;
if (classNames[kind].equals("RuleBasedBreakIterator")) {
iter = new RuleBasedBreakIterator(rules);
}
else if (classNames[kind].equals("DictionaryBasedBreakIterator")) {
try {
// System.out.println(dictionaryName);
Object t = bundle.getObject(dictionaryName);
// System.out.println(t);
URL url = (URL)t;
InputStream dictionary = url.openStream();
InputStream dictionary = ICUData.getStream(bundle.getString(dictionaryName));
// System.out.println("bundle: " + bundle + " dn: " + dictionaryName);
// Object t = bundle.getObject(dictionaryName);
// // System.out.println(t);
// URL url = (URL)t;
// System.out.println("url: " + url);
// InputStream dictionary = url.openStream();
// System.out.println("stream: " + dictionary);
iter = new DictionaryBasedBreakIterator(rules, dictionary);
}
catch(IOException e) {
System.out.println(e); // debug
}
catch(MissingResourceException e) {
System.out.println(e); // debug
}
// TODO: we don't have 'bad' resource data, so this should never happen
// in our current tests.

View file

@ -5,15 +5,14 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollationParsedRuleBuilder.java,v $
* $Date: 2004/01/28 02:05:51 $
* $Revision: 1.29 $
* $Date: 2004/02/06 21:54:03 $
* $Revision: 1.30 $
*
*******************************************************************************
*/
package com.ibm.icu.text;
import java.io.InputStream;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.text.ParseException;
import java.util.Hashtable;
import java.util.Vector;
@ -380,6 +379,12 @@ final class CollationParsedRuleBuilder
* Initializing the inverse UCA
*/
static {
InverseUCA temp = null;
try {
temp = CollatorReader.getInverseUCA();
} catch (IOException e) {
}
/*
try
{
String invdat = "/com/ibm/icu/impl/data/invuca.icu";
@ -394,13 +399,17 @@ final class CollationParsedRuleBuilder
e.printStackTrace();
throw new RuntimeException(e.getMessage());
}
if(RuleBasedCollator.UCA_ != null) {
if(!INVERSE_UCA_.m_UCA_version_.equals(RuleBasedCollator.UCA_.m_UCA_version_)) {
*/
if(temp != null && RuleBasedCollator.UCA_ != null) {
if(!temp.m_UCA_version_.equals(RuleBasedCollator.UCA_.m_UCA_version_)) {
throw new RuntimeException(INV_UCA_VERSION_MISMATCH_);
}
} else {
throw new RuntimeException(UCA_NOT_INSTANTIATED_);
}
INVERSE_UCA_ = temp;
}
// package private methods -----------------------------------------------

View file

@ -5,20 +5,26 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollatorReader.java,v $
* $Date: 2003/11/11 20:12:31 $
* $Revision: 1.16 $
* $Date: 2004/02/06 21:54:02 $
* $Revision: 1.17 $
*
*******************************************************************************
*/
package com.ibm.icu.text;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.DataInputStream;
import java.io.IOException;
import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.IntTrie;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.util.VersionInfo;
import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;
import com.ibm.icu.text.RuleBasedCollator.UCAConstants;
/**
* <p>Internal reader class for ICU data file uca.icu containing
@ -36,16 +42,59 @@ import com.ibm.icu.util.VersionInfo;
final class CollatorReader
{
static char[] read(RuleBasedCollator rbc, UCAConstants ucac) throws IOException {
InputStream i = ICUData.getRequiredStream("data/ucadata.icu");
BufferedInputStream b = new BufferedInputStream(i, 90000);
CollatorReader reader = new CollatorReader(b);
char[] result = reader.readImp(rbc, ucac);
b.close();
return result;
}
static void initRBC(RuleBasedCollator rbc, byte[] data) throws IOException {
final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
InputStream i = new ByteArrayInputStream(data);
BufferedInputStream b = new BufferedInputStream(i);
CollatorReader reader = new CollatorReader(b, false);
if (data.length > MIN_BINARY_DATA_SIZE_) {
reader.readImp(rbc, null);
} else {
reader.readHeader(rbc);
reader.readOptions(rbc);
// duplicating UCA_'s data
rbc.setWithUCATables();
}
}
static InverseUCA getInverseUCA() throws IOException {
InverseUCA result = null;
InputStream i = ICUData.getRequiredStream("data/invuca.icu");
// try {
// String invdat = "/com/ibm/icu/impl/data/invuca.icu";
// InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
BufferedInputStream b = new BufferedInputStream(i, 110000);
result = CollatorReader.readInverseUCA(b);
b.close();
i.close();
return result;
// } catch (Exception e) {
// throw new RuntimeException(e.getMessage());
// }
}
// protected constructor ---------------------------------------------
/**
* <p>Protected constructor.</p>
* @param inputStream ICU callator file input stream
* @param inputStream ICU collator file input stream
* @exception IOException throw if data file fails authentication
* @draft 2.1
*/
protected CollatorReader(InputStream inputStream) throws IOException
private CollatorReader(InputStream inputStream) throws IOException
{
this(inputStream, true);
/*
byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
// weiv: check that we have the correct Unicode version in
// binary files
@ -55,6 +104,7 @@ final class CollatorReader
throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
}
m_dataInputStream_ = new DataInputStream(inputStream);
*/
}
/**
@ -64,7 +114,7 @@ final class CollatorReader
* @exception IOException throw if data file fails authentication
* @draft 2.1
*/
protected CollatorReader(InputStream inputStream, boolean readICUHeader)
private CollatorReader(InputStream inputStream, boolean readICUHeader)
throws IOException
{
if (readICUHeader) {
@ -89,7 +139,7 @@ final class CollatorReader
* @param rbc RuleBasedCollator to populate with header information
* @exception IOException thrown when there's a data error.
*/
protected void readHeader(RuleBasedCollator rbc) throws IOException
private void readHeader(RuleBasedCollator rbc) throws IOException
{
m_size_ = m_dataInputStream_.readInt();
// all the offsets are in bytes
@ -192,7 +242,7 @@ final class CollatorReader
* @exception IOException thrown when there's a data error.
* @draft 2.2
*/
protected void readOptions(RuleBasedCollator rbc) throws IOException
private void readOptions(RuleBasedCollator rbc) throws IOException
{
int readcount = 0;
rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();
@ -245,7 +295,7 @@ final class CollatorReader
* @exception IOException thrown when there's a data error.
* @draft 2.2
*/
protected char[] read(RuleBasedCollator rbc,
private char[] readImp(RuleBasedCollator rbc,
RuleBasedCollator.UCAConstants UCAConst)
throws IOException
{
@ -421,7 +471,7 @@ final class CollatorReader
* @exception IOException thrown when error occurs while reading the
* inverse uca
*/
protected static CollationParsedRuleBuilder.InverseUCA readInverseUCA(
private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(
InputStream inputStream)
throws IOException
{

View file

@ -4,8 +4,8 @@
* others. All Rights Reserved. *
*******************************************************************************
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/IDNA.java,v $
* $Date: 2003/12/02 01:34:32 $
* $Revision: 1.5 $
* $Date: 2004/02/06 21:54:03 $
* $Revision: 1.6 $
*
*****************************************************************************************
*/
@ -14,7 +14,7 @@ package com.ibm.icu.text;
import java.io.IOException;
import java.io.InputStream;
import com.ibm.icu.impl.LocaleUtility;
import com.ibm.icu.impl.ICUData;
/**
*
@ -88,7 +88,7 @@ public final class IDNA {
/* private constructor to prevent construction of the object */
private IDNA(){
try{
InputStream stream = LocaleUtility.getImplDataResourceAsStream("uidna.spp");
InputStream stream = ICUData.getRequiredStream("data/uidna.spp");
namePrep = new StringPrep(stream);
stream.close();
}catch (IOException e){

View file

@ -5,16 +5,13 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java,v $
* $Date: 2004/01/28 02:05:51 $
* $Revision: 1.57 $
* $Date: 2004/02/06 21:54:02 $
* $Revision: 1.58 $
*
*******************************************************************************
*/
package com.ibm.icu.text;
import java.io.InputStream;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.util.Locale;
import java.util.ResourceBundle;
import java.util.Arrays;
@ -1561,6 +1558,8 @@ public final class RuleBasedCollator extends Collator
{
UCA_ = new RuleBasedCollator();
UCA_CONSTANTS_ = new UCAConstants();
UCA_CONTRACTIONS_ = CollatorReader.read(UCA_, UCA_CONSTANTS_);
/*
InputStream i = UCA_.getClass().getResourceAsStream(
"/com/ibm/icu/impl/data/ucadata.icu");
@ -1569,6 +1568,7 @@ public final class RuleBasedCollator extends Collator
UCA_CONTRACTIONS_ = reader.read(UCA_, UCA_CONSTANTS_);
b.close();
i.close();
*/
// called before doing canonical closure for the UCA.
impCEGen_ = new ImplicitCEGenerator(UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_, UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_);
// IMPLICIT_BASE_BYTE_ = UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_;
@ -1641,10 +1641,13 @@ public final class RuleBasedCollator extends Collator
if(rules[0][1] instanceof byte[]){
m_rules_ = (String)rules[1][1];
byte map[] = (byte [])rules[0][1];
CollatorReader.initRBC(this, map);
/*
BufferedInputStream input =
new BufferedInputStream(
new ByteArrayInputStream(map));
CollatorReader reader = new CollatorReader(input, false);
/*
CollatorReader reader = new CollatorReader(input, false);
if (map.length > MIN_BINARY_DATA_SIZE_) {
reader.read(this, null);
}
@ -1654,6 +1657,7 @@ public final class RuleBasedCollator extends Collator
// duplicating UCA_'s data
setWithUCATables();
}
*/
// at this point, we have read in the collator
// now we need to check whether the binary image has
// the right UCA and other versions
@ -1945,7 +1949,7 @@ public final class RuleBasedCollator extends Collator
* Minimum size required for the binary collation data in bytes.
* Size of UCA header + size of options to 4 bytes
*/
private static final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
//private static final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
/**
* If this collator is to generate only simple tertiaries for fast path