mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-3652 make icu4j run under security manager
X-SVN-Rev: 14467
This commit is contained in:
parent
aa012dfd7c
commit
873f4f09fd
32 changed files with 940 additions and 675 deletions
|
@ -6,8 +6,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/build.xml,v $
|
||||
* $Date: 2004/01/30 19:17:51 $
|
||||
* $Revision: 1.87 $
|
||||
* $Date: 2004/02/06 21:54:06 $
|
||||
* $Revision: 1.88 $
|
||||
*
|
||||
*******************************************************************************
|
||||
* This is the ant build file for ICU4J. See readme.html for more information.
|
||||
|
@ -86,6 +86,7 @@
|
|||
<property name="jarDocs.file" value="icu4jdocs.jar"/>
|
||||
<property name="icu4j.manifest" value="${src.dir}/com/ibm/icu/manifest.stub"/>
|
||||
<property name="icu4j.module.manifest" value="${src.dir}/com/ibm/icu/manifest.module.stub"/>
|
||||
<property name="icu4j.tests.manifest" value="${src.dir}/com/ibm/icu/dev/test/manifest.test.stub"/>
|
||||
<property name="zip.file" value="../icu4j${DSTAMP}.zip"/>
|
||||
<property name="zipSrc.file" value="../icu4jSrc${DSTAMP}.zip"/>
|
||||
|
||||
|
@ -239,11 +240,12 @@
|
|||
manifest="${icu4j.manifest}"/>
|
||||
</target>
|
||||
|
||||
<target name="testJar" depends="tests">
|
||||
<jar jarfile="${testjar.file}"
|
||||
compress="true"
|
||||
includes="com/ibm/icu/dev/test/**/*"
|
||||
basedir="${build.dir}"/>
|
||||
<target name="testJar" depends="tests,jar">
|
||||
<jar jarfile="${testjar.file}" compress="true" manifest="${icu4j.tests.manifest}">
|
||||
<fileset dir="${build.dir}" includes="com/ibm/icu/dev/test/**/*"/>
|
||||
<fileset dir="${build.dir}" includes="com/ibm/icu/dev/data/TestDataElements*.class"/>
|
||||
<fileset dir="${src.dir}" includes="com/ibm/icu/dev/data/**/*/" excludes="**/*.java"/> <!-- too broad, but easy -->
|
||||
</jar>
|
||||
</target>
|
||||
|
||||
<target name="jarSrc" depends="init">
|
||||
|
@ -310,24 +312,24 @@
|
|||
|
||||
<target name="check" depends="tests">
|
||||
<java classname="com.ibm.icu.dev.test.TestAll" fork="yes">
|
||||
<arg value="-w"/>
|
||||
<classpath>
|
||||
<pathelement path="${java.class.path}/"/>
|
||||
<pathelement location="clover.jar"/>
|
||||
<pathelement path="${build.dir}"/>
|
||||
</classpath>
|
||||
</classpath>
|
||||
</java>
|
||||
</target>
|
||||
|
||||
<target name="secureCheck" depends="tests">
|
||||
<target name="secureCheck" depends="testJar">
|
||||
<java classname="com.ibm.icu.dev.test.TestAll" fork="yes">
|
||||
<!-- (use for debugging, LOTS of output) jvmarg value="-Djava.security.debug=access" -->
|
||||
<jvmarg value="-Djava.security.manager"/>
|
||||
<jvmarg value="-Djava.security.policy=src/com/ibm/icu/dev/test/security.policy"/>
|
||||
<arg value="-w"/>
|
||||
<jvmarg value="-Djava.security.policy=${src.dir}/com/ibm/icu/dev/test/security.policy"/>
|
||||
<arg value="-w"/>
|
||||
<classpath>
|
||||
<pathelement path="${java.class.path}/"/>
|
||||
<pathelement location="clover.jar"/>
|
||||
<pathelement path="${build.dir}"/>
|
||||
<pathelement location="${testjar.file}"/>
|
||||
</classpath>
|
||||
</java>
|
||||
</target>
|
||||
|
|
|
@ -5,15 +5,16 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/demo/rbbi/BreakIteratorRules_en_US_DEMO.java,v $
|
||||
* $Date: 2002/02/19 04:10:23 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2004/02/06 21:54:04 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.demo.rbbi;
|
||||
|
||||
import java.util.ListResourceBundle;
|
||||
import java.net.URL;
|
||||
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
|
||||
/**
|
||||
* This resource bundle is included for testing and demonstration purposes only.
|
||||
|
@ -22,202 +23,209 @@ import java.net.URL;
|
|||
* with good resource data (and a good dictionary file) for Thai
|
||||
*/
|
||||
public class BreakIteratorRules_en_US_DEMO extends ListResourceBundle {
|
||||
|
||||
private static final URL url =
|
||||
BreakIteratorRules_en_US_DEMO.class.getResource("/com/ibm/data/misc/english.dict");
|
||||
|
||||
public Object[][] getContents() {
|
||||
return contents;
|
||||
}
|
||||
private static final String DATA_NAME = "/com/ibm/data/misc/english.dict";
|
||||
|
||||
static final Object[][] contents = {
|
||||
// names of classes to instantiate for the different kinds of break
|
||||
// iterator. Notice we're now using DictionaryBasedBreakIterator
|
||||
// for word and line breaking.
|
||||
{ "BreakIteratorClasses",
|
||||
new String[] { "RuleBasedBreakIterator", // character-break iterator class
|
||||
"DictionaryBasedBreakIterator", // word-break iterator class
|
||||
"DictionaryBasedBreakIterator", // line-break iterator class
|
||||
"RuleBasedBreakIterator" } // sentence-break iterator class
|
||||
},
|
||||
|
||||
// These are the same word-breaking rules as are specified in the default
|
||||
// resource, except that the Latin letters, apostrophe, and hyphen are
|
||||
// specified as dictionary characters
|
||||
{ "WordBreakRules",
|
||||
// ignore non-spacing marks, enclosing marks, and format characters,
|
||||
// all of which should not influence the algorithm
|
||||
"$_ignore_=[[:Mn:][:Me:][:Cf:]];"
|
||||
public Object[][] getContents() {
|
||||
final boolean exists = ICUData.exists(DATA_NAME);
|
||||
|
||||
// lower and upper case Roman letters, apostrophy and dash are
|
||||
// in the English dictionary
|
||||
+ "$_dictionary_=[a-zA-Z\\'\\-];"
|
||||
if (!exists) {
|
||||
return new Object[0][0];
|
||||
}
|
||||
|
||||
// Hindi phrase separator, kanji, katakana, hiragana, CJK diacriticals,
|
||||
// other letters, and digits
|
||||
+ "$danda=[\u0964\u0965];"
|
||||
+ "$kanji=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];"
|
||||
+ "$kata=[\u3099-\u309c\u30a1-\u30fe];"
|
||||
+ "$hira=[\u3041-\u309e\u30fc];"
|
||||
+ "$let=[[[:L:][:Mc:]]-[$kanji$kata$hira]];"
|
||||
+ "$dgt=[:N:];"
|
||||
return new Object[][] {
|
||||
// names of classes to instantiate for the different kinds of break
|
||||
// iterator. Notice we're now using DictionaryBasedBreakIterator
|
||||
// for word and line breaking.
|
||||
{ "BreakIteratorClasses",
|
||||
new String[] {
|
||||
"RuleBasedBreakIterator",
|
||||
// character-break iterator class
|
||||
"DictionaryBasedBreakIterator",
|
||||
// word-break iterator class
|
||||
"DictionaryBasedBreakIterator",
|
||||
// line-break iterator class
|
||||
"RuleBasedBreakIterator" } // sentence-break iterator class
|
||||
},
|
||||
|
||||
// punctuation that can occur in the middle of a word: currently
|
||||
// dashes, apostrophes, and quotation marks
|
||||
+ "$mid_word=[[:Pd:]\u00ad\u2027\\\"\\\'];"
|
||||
// These are the same word-breaking rules as are specified in the default
|
||||
// resource, except that the Latin letters, apostrophe, and hyphen are
|
||||
// specified as dictionary characters
|
||||
{
|
||||
"WordBreakRules",
|
||||
// ignore non-spacing marks, enclosing marks, and format characters,
|
||||
// all of which should not influence the algorithm
|
||||
"$_ignore_=[[:Mn:][:Me:][:Cf:]];"
|
||||
|
||||
// punctuation that can occur in the middle of a number: currently
|
||||
// apostrophes, qoutation marks, periods, commas, and the Arabic
|
||||
// decimal point
|
||||
+ "$mid_num=[\\\"\\\'\\,\u066b\\.];"
|
||||
// lower and upper case Roman letters, apostrophy and dash are
|
||||
// in the English dictionary
|
||||
+"$_dictionary_=[a-zA-Z\\'\\-];"
|
||||
|
||||
// punctuation that can occur at the beginning of a number: currently
|
||||
// the period, the number sign, and all currency symbols except the cents sign
|
||||
+ "$pre_num=[[[:Sc:]-[\u00a2]]\\#\\.];"
|
||||
// Hindi phrase separator, kanji, katakana, hiragana, CJK diacriticals,
|
||||
// other letters, and digits
|
||||
+"$danda=[\u0964\u0965];"
|
||||
+ "$kanji=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];"
|
||||
+ "$kata=[\u3099-\u309c\u30a1-\u30fe];"
|
||||
+ "$hira=[\u3041-\u309e\u30fc];"
|
||||
+ "$let=[[[:L:][:Mc:]]-[$kanji$kata$hira]];"
|
||||
+ "$dgt=[:N:];"
|
||||
|
||||
// punctuation that can occur at the end of a number: currently
|
||||
// the percent, per-thousand, per-ten-thousand, and Arabic percent
|
||||
// signs, the cents sign, and the ampersand
|
||||
+ "$post_num=[\\%\\&\u00a2\u066a\u2030\u2031];"
|
||||
// punctuation that can occur in the middle of a word: currently
|
||||
// dashes, apostrophes, and quotation marks
|
||||
+"$mid_word=[[:Pd:]\u00ad\u2027\\\"\\\'];"
|
||||
|
||||
// line separators: currently LF, FF, PS, and LS
|
||||
+ "$ls=[\n\u000c\u2028\u2029];"
|
||||
// punctuation that can occur in the middle of a number: currently
|
||||
// apostrophes, qoutation marks, periods, commas, and the Arabic
|
||||
// decimal point
|
||||
+"$mid_num=[\\\"\\\'\\,\u066b\\.];"
|
||||
|
||||
// whitespace: all space separators and the tab character
|
||||
+ "$ws=[[:Zs:]\t];"
|
||||
// punctuation that can occur at the beginning of a number: currently
|
||||
// the period, the number sign, and all currency symbols except the cents sign
|
||||
+"$pre_num=[[[:Sc:]-[\u00a2]]\\#\\.];"
|
||||
|
||||
// a word is a sequence of letters that may contain internal
|
||||
// punctuation, as long as it begins and ends with a letter and
|
||||
// never contains two punctuation marks in a row
|
||||
+ "$word=($let+($mid_word$let+)*$danda?);"
|
||||
// punctuation that can occur at the end of a number: currently
|
||||
// the percent, per-thousand, per-ten-thousand, and Arabic percent
|
||||
// signs, the cents sign, and the ampersand
|
||||
+"$post_num=[\\%\\&\u00a2\u066a\u2030\u2031];"
|
||||
|
||||
// a number is a sequence of digits that may contain internal
|
||||
// punctuation, as long as it begins and ends with a digit and
|
||||
// never contains two punctuation marks in a row.
|
||||
+ "$number=($dgt+($mid_num$dgt+)*);"
|
||||
// line separators: currently LF, FF, PS, and LS
|
||||
+"$ls=[\n\u000c\u2028\u2029];"
|
||||
|
||||
// break after every character, with the following exceptions
|
||||
// (this will cause punctuation marks that aren't considered
|
||||
// part of words or numbers to be treated as words unto themselves)
|
||||
+ ".;"
|
||||
// whitespace: all space separators and the tab character
|
||||
+"$ws=[[:Zs:]\t];"
|
||||
|
||||
// keep together any sequence of contiguous words and numbers
|
||||
// (including just one of either), plus an optional trailing
|
||||
// number-suffix character
|
||||
+ "$word?($number$word)*($number$post_num?)?;"
|
||||
// a word is a sequence of letters that may contain internal
|
||||
// punctuation, as long as it begins and ends with a letter and
|
||||
// never contains two punctuation marks in a row
|
||||
+"$word=($let+($mid_word$let+)*$danda?);"
|
||||
|
||||
// keep together and sequence of contiguous words and numbers
|
||||
// that starts with a number-prefix character and a number,
|
||||
// and may end with a number-suffix character
|
||||
+ "$pre_num($number$word)*($number$post_num?)?;"
|
||||
// a number is a sequence of digits that may contain internal
|
||||
// punctuation, as long as it begins and ends with a digit and
|
||||
// never contains two punctuation marks in a row.
|
||||
+"$number=($dgt+($mid_num$dgt+)*);"
|
||||
|
||||
// keep together runs of whitespace (optionally with a single trailing
|
||||
// line separator or CRLF sequence)
|
||||
+ "$ws*\r?$ls?;"
|
||||
// break after every character, with the following exceptions
|
||||
// (this will cause punctuation marks that aren't considered
|
||||
// part of words or numbers to be treated as words unto themselves)
|
||||
+".;"
|
||||
|
||||
// keep together runs of Katakana
|
||||
+ "$kata*;"
|
||||
// keep together any sequence of contiguous words and numbers
|
||||
// (including just one of either), plus an optional trailing
|
||||
// number-suffix character
|
||||
+"$word?($number$word)*($number$post_num?)?;"
|
||||
|
||||
// keep together runs of Hiragana
|
||||
+ "$hira*;"
|
||||
// keep together and sequence of contiguous words and numbers
|
||||
// that starts with a number-prefix character and a number,
|
||||
// and may end with a number-suffix character
|
||||
+"$pre_num($number$word)*($number$post_num?)?;"
|
||||
|
||||
// keep together runs of Kanji
|
||||
+ "$kanji*;"},
|
||||
|
||||
// These are the same line-breaking rules as are specified in the default
|
||||
// resource, except that the Latin letters, apostrophe, and hyphen are
|
||||
// specified as dictionary characters
|
||||
{ "LineBreakRules",
|
||||
// ignore non-spacing marks, enclosing marks, and format characters
|
||||
"$_ignore_=[[:Mn:][:Me:][:Cf:]];"
|
||||
// keep together runs of whitespace (optionally with a single trailing
|
||||
// line separator or CRLF sequence)
|
||||
+"$ws*\r?$ls?;"
|
||||
|
||||
// lower and upper case Roman letters, apostrophy and dash
|
||||
// are in the English dictionary
|
||||
+ "$_dictionary_=[a-zA-Z\\'\\-];"
|
||||
// keep together runs of Katakana
|
||||
+"$kata*;"
|
||||
|
||||
// Hindi phrase separators
|
||||
+ "$danda=[\u0964\u0965];"
|
||||
// keep together runs of Hiragana
|
||||
+"$hira*;"
|
||||
|
||||
// characters that always cause a break: ETX, tab, LF, FF, LS, and PS
|
||||
+ "$break=[\u0003\t\n\f\u2028\u2029];"
|
||||
// keep together runs of Kanji
|
||||
+"$kanji*;" },
|
||||
|
||||
// characters that always prevent a break: the non-breaking space
|
||||
// and similar characters
|
||||
+ "$nbsp=[\u00a0\u2007\u2011\ufeff];"
|
||||
// These are the same line-breaking rules as are specified in the default
|
||||
// resource, except that the Latin letters, apostrophe, and hyphen are
|
||||
// specified as dictionary characters
|
||||
{ "LineBreakRules",
|
||||
// ignore non-spacing marks, enclosing marks, and format characters
|
||||
"$_ignore_=[[:Mn:][:Me:][:Cf:]];"
|
||||
|
||||
// whitespace: space separators and control characters, except for
|
||||
// CR and the other characters mentioned above
|
||||
+ "$space=[[[:Zs:][:Cc:]]-[$nbsp$break\r]];"
|
||||
// lower and upper case Roman letters, apostrophy and dash
|
||||
// are in the English dictionary
|
||||
+"$_dictionary_=[a-zA-Z\\'\\-];"
|
||||
|
||||
// dashes: dash punctuation and the discretionary hyphen, except for
|
||||
// non-breaking hyphens
|
||||
+ "$dash=[[[:Pd:]\u00ad]-[$nbsp]];"
|
||||
// Hindi phrase separators
|
||||
+"$danda=[\u0964\u0965];"
|
||||
|
||||
// characters that stick to a word if they precede it: currency symbols
|
||||
// (except the cents sign) and starting punctuation
|
||||
+ "$pre_word=[[[:Sc:]-[\u00a2]][:Ps:]\\\"\\\'];"
|
||||
// characters that always cause a break: ETX, tab, LF, FF, LS, and PS
|
||||
+"$break=[\u0003\t\n\f\u2028\u2029];"
|
||||
|
||||
// characters that stick to a word if they follow it: ending punctuation,
|
||||
// other punctuation that usually occurs at the end of a sentence,
|
||||
// small Kana characters, some CJK diacritics, etc.
|
||||
+ "$post_word=[[:Pe:]\\!\\\"\\\'\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034"
|
||||
+ "\u2103\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063"
|
||||
+ "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9"
|
||||
+ "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0c"
|
||||
+ "\uff0e\uff1f];"
|
||||
// characters that always prevent a break: the non-breaking space
|
||||
// and similar characters
|
||||
+"$nbsp=[\u00a0\u2007\u2011\ufeff];"
|
||||
|
||||
// Kanji: actually includes both Kanji and Kana, except for small Kana and
|
||||
// CJK diacritics
|
||||
+ "$kanji=[[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa]-[$post_word$_ignore_]];"
|
||||
// whitespace: space separators and control characters, except for
|
||||
// CR and the other characters mentioned above
|
||||
+"$space=[[[:Zs:][:Cc:]]-[$nbsp$break\r]];"
|
||||
|
||||
// digits
|
||||
+ "$digit=[[:Nd:][:No:]];"
|
||||
// dashes: dash punctuation and the discretionary hyphen, except for
|
||||
// non-breaking hyphens
|
||||
+"$dash=[[[:Pd:]\u00ad]-[$nbsp]];"
|
||||
|
||||
// punctuation that can occur in the middle of a number: periods and commas
|
||||
+ "$mid_num=[\\.\\,];"
|
||||
// characters that stick to a word if they precede it: currency symbols
|
||||
// (except the cents sign) and starting punctuation
|
||||
+"$pre_word=[[[:Sc:]-[\u00a2]][:Ps:]\\\"\\\'];"
|
||||
|
||||
// everything not mentioned above, plus the quote marks (which are both
|
||||
// <pre-word>, <post-word>, and <char>)
|
||||
+ "$char=[^$break$space$dash$kanji$nbsp$_ignore_$pre_word$post_word$mid_num$danda\r\\\"\\\'];"
|
||||
// characters that stick to a word if they follow it: ending punctuation,
|
||||
// other punctuation that usually occurs at the end of a sentence,
|
||||
// small Kana characters, some CJK diacritics, etc.
|
||||
+"$post_word=[[:Pe:]\\!\\\"\\\'\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034"
|
||||
+ "\u2103\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063"
|
||||
+ "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9"
|
||||
+ "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0c"
|
||||
+ "\uff0e\uff1f];"
|
||||
|
||||
// a "number" is a run of prefix characters and dashes, followed by one or
|
||||
// more digits with isolated number-punctuation characters interspersed
|
||||
+ "$number=([$pre_word$dash]*$digit+($mid_num$digit+)*);"
|
||||
// Kanji: actually includes both Kanji and Kana, except for small Kana and
|
||||
// CJK diacritics
|
||||
+"$kanji=[[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa]-[$post_word$_ignore_]];"
|
||||
|
||||
// the basic core of a word can be either a "number" as defined above, a single
|
||||
// "Kanji" character, or a run of any number of not-explicitly-mentioned
|
||||
// characters (this includes Latin letters)
|
||||
+ "$word_core=([$pre_word$char]*|$kanji|$number);"
|
||||
// digits
|
||||
+"$digit=[[:Nd:][:No:]];"
|
||||
|
||||
// a word may end with an optional suffix that be either a run of one or
|
||||
// more dashes or a run of word-suffix characters, followed by an optional
|
||||
// run of whitespace
|
||||
+ "$word_suffix=(($dash+|$post_word*)$space*);"
|
||||
// punctuation that can occur in the middle of a number: periods and commas
|
||||
+"$mid_num=[\\.\\,];"
|
||||
|
||||
// a word, thus, is an optional run of word-prefix characters, followed by
|
||||
// a word core and a word suffix (the syntax of <word-core> and <word-suffix>
|
||||
// actually allows either of them to match the empty string, putting a break
|
||||
// between things like ")(" or "aaa(aaa"
|
||||
+ "$word=($pre_word*$word_core$word_suffix);"
|
||||
// everything not mentioned above, plus the quote marks (which are both
|
||||
// <pre-word>, <post-word>, and <char>)
|
||||
+"$char=[^$break$space$dash$kanji$nbsp$_ignore_$pre_word$post_word$mid_num$danda\r\\\"\\\'];"
|
||||
|
||||
// finally, the rule that does the work: Keep together any run of words that
|
||||
// are joined by runs of one of more non-spacing mark. Also keep a trailing
|
||||
// line-break character or CRLF combination with the word. (line separators
|
||||
// "win" over nbsp's)
|
||||
+ "$word($nbsp+$word)*\r?$break?;" },
|
||||
|
||||
// these two resources specify the pathnames of the dictionary files to
|
||||
// use for word breaking and line breaking. Both currently refer to
|
||||
// a file called english.dict placed in com.ibm.icu.impl.data
|
||||
// somewhere in the class path. It's important to note that
|
||||
// english.dict was created for testing purposes only, and doesn't
|
||||
// come anywhere close to being an exhaustive dictionary of English
|
||||
// words (basically, it contains all the words in the Declaration of
|
||||
// Independence, and the Revised Standard Version of the book of Genesis,
|
||||
// plus a few other words thrown in to show more interesting cases).
|
||||
// { "WordBreakDictionary", "com\\ibm\\text\\resources\\english.dict" },
|
||||
// { "LineBreakDictionary", "com\\ibm\\text\\resources\\english.dict" }
|
||||
{ "WordBreakDictionary", url },
|
||||
{ "LineBreakDictionary", url }
|
||||
};
|
||||
// a "number" is a run of prefix characters and dashes, followed by one or
|
||||
// more digits with isolated number-punctuation characters interspersed
|
||||
+"$number=([$pre_word$dash]*$digit+($mid_num$digit+)*);"
|
||||
|
||||
// the basic core of a word can be either a "number" as defined above, a single
|
||||
// "Kanji" character, or a run of any number of not-explicitly-mentioned
|
||||
// characters (this includes Latin letters)
|
||||
+"$word_core=([$pre_word$char]*|$kanji|$number);"
|
||||
|
||||
// a word may end with an optional suffix that be either a run of one or
|
||||
// more dashes or a run of word-suffix characters, followed by an optional
|
||||
// run of whitespace
|
||||
+"$word_suffix=(($dash+|$post_word*)$space*);"
|
||||
|
||||
// a word, thus, is an optional run of word-prefix characters, followed by
|
||||
// a word core and a word suffix (the syntax of <word-core> and <word-suffix>
|
||||
// actually allows either of them to match the empty string, putting a break
|
||||
// between things like ")(" or "aaa(aaa"
|
||||
+"$word=($pre_word*$word_core$word_suffix);"
|
||||
|
||||
// finally, the rule that does the work: Keep together any run of words that
|
||||
// are joined by runs of one of more non-spacing mark. Also keep a trailing
|
||||
// line-break character or CRLF combination with the word. (line separators
|
||||
// "win" over nbsp's)
|
||||
+"$word($nbsp+$word)*\r?$break?;" },
|
||||
|
||||
// these two resources specify the pathnames of the dictionary files to
|
||||
// use for word breaking and line breaking. Both currently refer to
|
||||
// a file called english.dict placed in com.ibm.icu.impl.data
|
||||
// somewhere in the class path. It's important to note that
|
||||
// english.dict was created for testing purposes only, and doesn't
|
||||
// come anywhere close to being an exhaustive dictionary of English
|
||||
// words (basically, it contains all the words in the Declaration of
|
||||
// Independence, and the Revised Standard Version of the book of Genesis,
|
||||
// plus a few other words thrown in to show more interesting cases).
|
||||
// { "WordBreakDictionary", "com\\ibm\\text\\resources\\english.dict" },
|
||||
// { "LineBreakDictionary", "com\\ibm\\text\\resources\\english.dict" }
|
||||
{ "WordBreakDictionary", DATA_NAME },
|
||||
{ "LineBreakDictionary", DATA_NAME }
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/TestFmwk.java,v $
|
||||
* $Date: 2004/01/27 23:13:13 $
|
||||
* $Revision: 1.59 $
|
||||
* $Date: 2004/02/06 21:54:04 $
|
||||
* $Revision: 1.60 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -224,7 +224,6 @@ public class TestFmwk extends AbstractTestLog {
|
|||
if (name.indexOf('.') == -1) {
|
||||
name = defaultPackage + name;
|
||||
}
|
||||
|
||||
try {
|
||||
Class cls = Class.forName(name);
|
||||
if (!TestFmwk.class.isAssignableFrom(cls)) {
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/TestUtil.java,v $
|
||||
* $Date: 2003/11/24 22:13:51 $
|
||||
* $Revision: 1.10 $
|
||||
* $Date: 2004/02/06 21:54:05 $
|
||||
* $Revision: 1.11 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -18,172 +18,221 @@ import java.io.IOException;
|
|||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
|
||||
public class TestUtil {
|
||||
/**
|
||||
* Standard path to the test data.
|
||||
*/
|
||||
public static final String DATA_PATH = "/src/com/ibm/icu/dev/data/";
|
||||
public final class TestUtil {
|
||||
/**
|
||||
* Path to test data in icu4jtest.jar
|
||||
*/
|
||||
public static final String LOCAL_DATA_PATH = "/com/ibm/icu/dev/data/";
|
||||
|
||||
/**
|
||||
* Property for user-defined data path.
|
||||
*/
|
||||
public static final String DATA_PATH_PROPERTY = "ICUDataPath";
|
||||
/**
|
||||
* Standard path to the test data in the file system.
|
||||
*/
|
||||
public static final String DATA_PATH = "/src" + LOCAL_DATA_PATH;
|
||||
|
||||
/**
|
||||
* Property for user-defined data path.
|
||||
*/
|
||||
public static final String DATA_PATH_PROPERTY = "ICUDataPath";
|
||||
|
||||
/**
|
||||
* Property for modular build.
|
||||
*/
|
||||
public static final String DATA_MODULAR_BUILD_PROPERTY = "ICUModularBuild";
|
||||
/**
|
||||
* Property for modular build.
|
||||
*/
|
||||
public static final String DATA_MODULAR_BUILD_PROPERTY = "ICUModularBuild";
|
||||
|
||||
/**
|
||||
* Compute a full data path using the ICUDataPath, if defined, or the user.dir, if we
|
||||
* are allowed access to it.
|
||||
*/
|
||||
private static final String dataPath(String fileName) {
|
||||
String s = System.getProperty(DATA_PATH_PROPERTY);
|
||||
if (s == null) {
|
||||
// assume user.dir is directly above src directory
|
||||
// data path must end in '/' or '\', fileName should not start with one
|
||||
s = System.getProperty("user.dir"); // protected property
|
||||
s = s + DATA_PATH;
|
||||
}
|
||||
return s + fileName;
|
||||
}
|
||||
/**
|
||||
* Compute a full data path using the ICUDataPath, if defined, or the user.dir, if we
|
||||
* are allowed access to it.
|
||||
*/
|
||||
private static final String dataPath(String fileName) {
|
||||
String s = System.getProperty(DATA_PATH_PROPERTY);
|
||||
if (s == null) {
|
||||
// assume user.dir is directly above src directory
|
||||
// data path must end in '/' or '\', fileName should not start with one
|
||||
s = System.getProperty("user.dir"); // protected property
|
||||
s = s + DATA_PATH;
|
||||
}
|
||||
return s + fileName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a buffered reader on the data file at path 'name' rooted at the data path.
|
||||
*/
|
||||
public static final BufferedReader getDataReader(String name) throws IOException {
|
||||
InputStream is = new FileInputStream(dataPath(name));
|
||||
InputStreamReader isr = new InputStreamReader(is);
|
||||
return new BufferedReader(isr);
|
||||
}
|
||||
/**
|
||||
* Return an input stream on the data file at path 'name' rooted at the data path
|
||||
*/
|
||||
public static final InputStream getDataStream(String name) throws IOException {
|
||||
InputStream is = null;
|
||||
try {
|
||||
is = new FileInputStream(dataPath(name));
|
||||
} catch (Throwable e) {
|
||||
try {
|
||||
is = TestUtil.class.getResourceAsStream(LOCAL_DATA_PATH + name);
|
||||
} catch (Throwable t) {
|
||||
IOException ex =
|
||||
new IOException("data resource '" + name + "' not found");
|
||||
ex.initCause(t);
|
||||
throw ex;
|
||||
}
|
||||
}
|
||||
return is;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a buffered reader on the data file at path 'name' rooted at the data path,
|
||||
* using the provided encoding.
|
||||
*/
|
||||
public static final BufferedReader getDataReader(String name, String charset) throws IOException {
|
||||
InputStream is = new FileInputStream(dataPath(name));
|
||||
InputStreamReader isr = new InputStreamReader(is, charset);
|
||||
return new BufferedReader(isr);
|
||||
}
|
||||
/**
|
||||
* Return a buffered reader on the data file at path 'name' rooted at the data path.
|
||||
*/
|
||||
public static final BufferedReader getDataReader(String name, String charset) throws IOException {
|
||||
InputStream is = getDataStream(name);;
|
||||
InputStreamReader isr =
|
||||
charset == null
|
||||
? new InputStreamReader(is)
|
||||
: new InputStreamReader(is, charset);
|
||||
return new BufferedReader(isr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an input stream on the data file at path 'name' rooted at the data path
|
||||
*/
|
||||
public static final InputStream getDataStream(String name) throws IOException{
|
||||
return new FileInputStream(dataPath(name));
|
||||
}
|
||||
/**
|
||||
* Return a buffered reader on the data file at path 'name' rooted at the data path,
|
||||
* using the provided encoding.
|
||||
*/
|
||||
public static final BufferedReader getDataReader(String name)
|
||||
throws IOException {
|
||||
return getDataReader(name, null);
|
||||
}
|
||||
|
||||
static final char DIGITS[] = {
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
|
||||
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
|
||||
'U', 'V', 'W', 'X', 'Y', 'Z'
|
||||
};
|
||||
/**
|
||||
* Return true if the character is NOT printable ASCII. The tab,
|
||||
* newline and linefeed characters are considered unprintable.
|
||||
*/
|
||||
public static boolean isUnprintable(int c) {
|
||||
return !(c >= 0x20 && c <= 0x7E);
|
||||
}
|
||||
/**
|
||||
* Escape unprintable characters using <backslash>uxxxx notation
|
||||
* for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
|
||||
* above. If the character is printable ASCII, then do nothing
|
||||
* and return FALSE. Otherwise, append the escaped notation and
|
||||
* return TRUE.
|
||||
*/
|
||||
public static boolean escapeUnprintable(StringBuffer result, int c) {
|
||||
if (isUnprintable(c)) {
|
||||
result.append('\\');
|
||||
if ((c & ~0xFFFF) != 0) {
|
||||
result.append('U');
|
||||
result.append(DIGITS[0xF&(c>>28)]);
|
||||
result.append(DIGITS[0xF&(c>>24)]);
|
||||
result.append(DIGITS[0xF&(c>>20)]);
|
||||
result.append(DIGITS[0xF&(c>>16)]);
|
||||
} else {
|
||||
result.append('u');
|
||||
}
|
||||
result.append(DIGITS[0xF&(c>>12)]);
|
||||
result.append(DIGITS[0xF&(c>>8)]);
|
||||
result.append(DIGITS[0xF&(c>>4)]);
|
||||
result.append(DIGITS[0xF&c]);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
static final char DIGITS[] =
|
||||
{
|
||||
'0',
|
||||
'1',
|
||||
'2',
|
||||
'3',
|
||||
'4',
|
||||
'5',
|
||||
'6',
|
||||
'7',
|
||||
'8',
|
||||
'9',
|
||||
'A',
|
||||
'B',
|
||||
'C',
|
||||
'D',
|
||||
'E',
|
||||
'F',
|
||||
'G',
|
||||
'H',
|
||||
'I',
|
||||
'J',
|
||||
'K',
|
||||
'L',
|
||||
'M',
|
||||
'N',
|
||||
'O',
|
||||
'P',
|
||||
'Q',
|
||||
'R',
|
||||
'S',
|
||||
'T',
|
||||
'U',
|
||||
'V',
|
||||
'W',
|
||||
'X',
|
||||
'Y',
|
||||
'Z' };
|
||||
/**
|
||||
* Return true if the character is NOT printable ASCII. The tab,
|
||||
* newline and linefeed characters are considered unprintable.
|
||||
*/
|
||||
public static boolean isUnprintable(int c) {
|
||||
return !(c >= 0x20 && c <= 0x7E);
|
||||
}
|
||||
/**
|
||||
* Escape unprintable characters using <backslash>uxxxx notation
|
||||
* for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
|
||||
* above. If the character is printable ASCII, then do nothing
|
||||
* and return FALSE. Otherwise, append the escaped notation and
|
||||
* return TRUE.
|
||||
*/
|
||||
public static boolean escapeUnprintable(StringBuffer result, int c) {
|
||||
if (isUnprintable(c)) {
|
||||
result.append('\\');
|
||||
if ((c & ~0xFFFF) != 0) {
|
||||
result.append('U');
|
||||
result.append(DIGITS[0xF & (c >> 28)]);
|
||||
result.append(DIGITS[0xF & (c >> 24)]);
|
||||
result.append(DIGITS[0xF & (c >> 20)]);
|
||||
result.append(DIGITS[0xF & (c >> 16)]);
|
||||
} else {
|
||||
result.append('u');
|
||||
}
|
||||
result.append(DIGITS[0xF & (c >> 12)]);
|
||||
result.append(DIGITS[0xF & (c >> 8)]);
|
||||
result.append(DIGITS[0xF & (c >> 4)]);
|
||||
result.append(DIGITS[0xF & c]);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static class Lock {
|
||||
private int count;
|
||||
static class Lock {
|
||||
private int count;
|
||||
|
||||
synchronized void inc() {
|
||||
++count;
|
||||
}
|
||||
synchronized void inc() {
|
||||
++count;
|
||||
}
|
||||
|
||||
synchronized void dec() {
|
||||
--count;
|
||||
}
|
||||
synchronized void dec() {
|
||||
--count;
|
||||
}
|
||||
|
||||
synchronized int count() {
|
||||
return count;
|
||||
}
|
||||
synchronized int count() {
|
||||
return count;
|
||||
}
|
||||
|
||||
void go() {
|
||||
try {
|
||||
while (count() > 0) {
|
||||
synchronized(this) {
|
||||
notifyAll();
|
||||
}
|
||||
Thread.sleep(50);
|
||||
}
|
||||
}
|
||||
catch (InterruptedException e) {
|
||||
}
|
||||
}
|
||||
}
|
||||
void go() {
|
||||
try {
|
||||
while (count() > 0) {
|
||||
synchronized (this) {
|
||||
notifyAll();
|
||||
}
|
||||
Thread.sleep(50);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class TestThread extends Thread {
|
||||
Lock lock;
|
||||
Runnable target;
|
||||
static class TestThread extends Thread {
|
||||
Lock lock;
|
||||
Runnable target;
|
||||
|
||||
TestThread(Lock lock, Runnable target) {
|
||||
this.lock = lock;
|
||||
this.target = target;
|
||||
TestThread(Lock lock, Runnable target) {
|
||||
this.lock = lock;
|
||||
this.target = target;
|
||||
|
||||
lock.inc();
|
||||
}
|
||||
|
||||
public void run() {
|
||||
try {
|
||||
synchronized (lock) {
|
||||
lock.wait();
|
||||
}
|
||||
target.run();
|
||||
}
|
||||
catch (InterruptedException e) {
|
||||
}
|
||||
lock.inc();
|
||||
}
|
||||
|
||||
lock.dec();
|
||||
}
|
||||
}
|
||||
public void run() {
|
||||
try {
|
||||
synchronized (lock) {
|
||||
lock.wait();
|
||||
}
|
||||
target.run();
|
||||
} catch (InterruptedException e) {
|
||||
}
|
||||
|
||||
public static void runUntilDone(Runnable[] targets) {
|
||||
if (targets == null) {
|
||||
throw new IllegalArgumentException("targets is null");
|
||||
}
|
||||
if (targets.length == 0) {
|
||||
return;
|
||||
}
|
||||
lock.dec();
|
||||
}
|
||||
}
|
||||
|
||||
Lock lock = new Lock();
|
||||
for (int i = 0; i < targets.length; ++i) {
|
||||
new TestThread(lock, targets[i]).start();
|
||||
}
|
||||
public static void runUntilDone(Runnable[] targets) {
|
||||
if (targets == null) {
|
||||
throw new IllegalArgumentException("targets is null");
|
||||
}
|
||||
if (targets.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
lock.go();
|
||||
}
|
||||
Lock lock = new Lock();
|
||||
for (int i = 0; i < targets.length; ++i) {
|
||||
new TestThread(lock, targets[i]).start();
|
||||
}
|
||||
|
||||
lock.go();
|
||||
}
|
||||
}
|
||||
|
|
14
icu4j/src/com/ibm/icu/dev/test/manifest.test.stub
Normal file
14
icu4j/src/com/ibm/icu/dev/test/manifest.test.stub
Normal file
|
@ -0,0 +1,14 @@
|
|||
Manifest-Version: 1.0
|
||||
Class-Path: icu4j.jar
|
||||
Main-Class: com.ibm.icu.dev.test.TestAll
|
||||
|
||||
Name: com/ibm/icu/
|
||||
Specification-Title: ICU for Java Tests
|
||||
Specification-Version: 2.8
|
||||
Specification-Vendor: ICU
|
||||
Implementation-Title: ICU for Java
|
||||
Implementation-Version: 2.8.0
|
||||
Implementation-Vendor: IBM Corporation
|
||||
Implementation-Vendor-Id: com.ibm
|
||||
Copyright-Info: Copyright (c) 2000-2004, International Business Machines Corporation and others. All Rights Reserved.
|
||||
Sealed: false
|
|
@ -82,17 +82,10 @@ class NormalizerBuilder {
|
|||
*/
|
||||
private static void readExclusionList(BitSet isExcluded) throws java.io.IOException {
|
||||
if (DEBUG) System.out.println("Reading Exclusions");
|
||||
//BufferedReader in = new BufferedReader(new FileReader(COMPOSITION_EXCLUSIONS), 5*1024);
|
||||
BufferedReader in = null;
|
||||
try {
|
||||
in = TestUtil.getDataReader("unicode/CompositionExclusions.txt");
|
||||
} catch (Exception e) {
|
||||
System.err.println("Fail to read the file CompositionExclusions.txt!");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
BufferedReader in = TestUtil.getDataReader("unicode/CompositionExclusions.txt");
|
||||
|
||||
while (true) {
|
||||
|
||||
// read a line, discarding comments and blank lines
|
||||
|
||||
String line = in.readLine();
|
||||
|
@ -511,4 +504,4 @@ class NormalizerBuilder {
|
|||
}
|
||||
return result.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,15 +5,14 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/rbbi/BreakIteratorRules_en_US_TEST.java,v $
|
||||
* $Date: 2002/02/16 03:05:14 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2004/02/06 21:53:59 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.rbbi;
|
||||
|
||||
import java.util.ListResourceBundle;
|
||||
import java.net.URL;
|
||||
|
||||
/**
|
||||
* This resource bundle is included for testing and demonstration purposes only.
|
||||
|
@ -22,201 +21,205 @@ import java.net.URL;
|
|||
* with good resource data (and a good dictionary file) for Thai
|
||||
*/
|
||||
public class BreakIteratorRules_en_US_TEST extends ListResourceBundle {
|
||||
private static final URL url =
|
||||
BreakIteratorRules_en_US_TEST.class.getResource("/com/ibm/data/misc/english.dict");
|
||||
private static final String DATA_NAME = "/com/ibm/icu/dev/data/rbbi/english.dict";
|
||||
|
||||
public Object[][] getContents() {
|
||||
return contents;
|
||||
}
|
||||
// calling code will handle case where dictionary does not exist
|
||||
|
||||
static final Object[][] contents = {
|
||||
// names of classes to instantiate for the different kinds of break
|
||||
// iterator. Notice we're now using DictionaryBasedBreakIterator
|
||||
// for word and line breaking.
|
||||
{ "BreakIteratorClasses",
|
||||
new String[] { "RuleBasedBreakIterator", // character-break iterator class
|
||||
"DictionaryBasedBreakIterator", // word-break iterator class
|
||||
"DictionaryBasedBreakIterator", // line-break iterator class
|
||||
"RuleBasedBreakIterator" } // sentence-break iterator class
|
||||
},
|
||||
public Object[][] getContents() {
|
||||
return new Object[][] {
|
||||
// names of classes to instantiate for the different kinds of break
|
||||
// iterator. Notice we're now using DictionaryBasedBreakIterator
|
||||
// for word and line breaking.
|
||||
{ "BreakIteratorClasses",
|
||||
new String[] {
|
||||
"RuleBasedBreakIterator",
|
||||
// character-break iterator class
|
||||
"DictionaryBasedBreakIterator",
|
||||
// word-break iterator class
|
||||
"DictionaryBasedBreakIterator",
|
||||
// line-break iterator class
|
||||
"RuleBasedBreakIterator" } // sentence-break iterator class
|
||||
},
|
||||
|
||||
// These are the same word-breaking rules as are specified in the default
|
||||
// resource, except that the Latin letters, apostrophe, and hyphen are
|
||||
// specified as dictionary characters
|
||||
{ "WordBreakRules",
|
||||
// ignore non-spacing marks, enclosing marks, and format characters,
|
||||
// all of which should not influence the algorithm
|
||||
"$_ignore_=[[:Mn:][:Me:][:Cf:]];"
|
||||
// These are the same word-breaking rules as are specified in the default
|
||||
// resource, except that the Latin letters, apostrophe, and hyphen are
|
||||
// specified as dictionary characters
|
||||
{
|
||||
"WordBreakRules",
|
||||
// ignore non-spacing marks, enclosing marks, and format characters,
|
||||
// all of which should not influence the algorithm
|
||||
"$_ignore_=[[:Mn:][:Me:][:Cf:]];"
|
||||
|
||||
// lower and upper case Roman letters, apostrophy and dash are
|
||||
// in the English dictionary
|
||||
+ "$_dictionary_=[a-zA-Z\\'\\-];"
|
||||
// lower and upper case Roman letters, apostrophy and dash are
|
||||
// in the English dictionary
|
||||
+"$_dictionary_=[a-zA-Z\\'\\-];"
|
||||
|
||||
// Hindi phrase separator, kanji, katakana, hiragana, CJK diacriticals,
|
||||
// other letters, and digits
|
||||
+ "$danda=[\u0964\u0965];"
|
||||
+ "$kanji=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];"
|
||||
+ "$kata=[\u3099-\u309c\u30a1-\u30fe];"
|
||||
+ "$hira=[\u3041-\u309e\u30fc];"
|
||||
+ "$let=[[[:L:][:Mc:]]-[$kanji$kata$hira]];"
|
||||
+ "$dgt=[:N:];"
|
||||
// Hindi phrase separator, kanji, katakana, hiragana, CJK diacriticals,
|
||||
// other letters, and digits
|
||||
+"$danda=[\u0964\u0965];"
|
||||
+ "$kanji=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];"
|
||||
+ "$kata=[\u3099-\u309c\u30a1-\u30fe];"
|
||||
+ "$hira=[\u3041-\u309e\u30fc];"
|
||||
+ "$let=[[[:L:][:Mc:]]-[$kanji$kata$hira]];"
|
||||
+ "$dgt=[:N:];"
|
||||
|
||||
// punctuation that can occur in the middle of a word: currently
|
||||
// dashes, apostrophes, and quotation marks
|
||||
+ "$mid_word=[[:Pd:]\u00ad\u2027\\\"\\\'];"
|
||||
// punctuation that can occur in the middle of a word: currently
|
||||
// dashes, apostrophes, and quotation marks
|
||||
+"$mid_word=[[:Pd:]\u00ad\u2027\\\"\\\'];"
|
||||
|
||||
// punctuation that can occur in the middle of a number: currently
|
||||
// apostrophes, qoutation marks, periods, commas, and the Arabic
|
||||
// decimal point
|
||||
+ "$mid_num=[\\\"\\\'\\,\u066b\\.];"
|
||||
// punctuation that can occur in the middle of a number: currently
|
||||
// apostrophes, qoutation marks, periods, commas, and the Arabic
|
||||
// decimal point
|
||||
+"$mid_num=[\\\"\\\'\\,\u066b\\.];"
|
||||
|
||||
// punctuation that can occur at the beginning of a number: currently
|
||||
// the period, the number sign, and all currency symbols except the cents sign
|
||||
+ "$pre_num=[[[:Sc:]-[\u00a2]]\\#\\.];"
|
||||
// punctuation that can occur at the beginning of a number: currently
|
||||
// the period, the number sign, and all currency symbols except the cents sign
|
||||
+"$pre_num=[[[:Sc:]-[\u00a2]]\\#\\.];"
|
||||
|
||||
// punctuation that can occur at the end of a number: currently
|
||||
// the percent, per-thousand, per-ten-thousand, and Arabic percent
|
||||
// signs, the cents sign, and the ampersand
|
||||
+ "$post_num=[\\%\\&\u00a2\u066a\u2030\u2031];"
|
||||
// punctuation that can occur at the end of a number: currently
|
||||
// the percent, per-thousand, per-ten-thousand, and Arabic percent
|
||||
// signs, the cents sign, and the ampersand
|
||||
+"$post_num=[\\%\\&\u00a2\u066a\u2030\u2031];"
|
||||
|
||||
// line separators: currently LF, FF, PS, and LS
|
||||
+ "$ls=[\n\u000c\u2028\u2029];"
|
||||
// line separators: currently LF, FF, PS, and LS
|
||||
+"$ls=[\n\u000c\u2028\u2029];"
|
||||
|
||||
// whitespace: all space separators and the tab character
|
||||
+ "$ws=[[:Zs:]\t];"
|
||||
// whitespace: all space separators and the tab character
|
||||
+"$ws=[[:Zs:]\t];"
|
||||
|
||||
// a word is a sequence of letters that may contain internal
|
||||
// punctuation, as long as it begins and ends with a letter and
|
||||
// never contains two punctuation marks in a row
|
||||
+ "$word=($let+($mid_word$let+)*$danda?);"
|
||||
// a word is a sequence of letters that may contain internal
|
||||
// punctuation, as long as it begins and ends with a letter and
|
||||
// never contains two punctuation marks in a row
|
||||
+"$word=($let+($mid_word$let+)*$danda?);"
|
||||
|
||||
// a number is a sequence of digits that may contain internal
|
||||
// punctuation, as long as it begins and ends with a digit and
|
||||
// never contains two punctuation marks in a row.
|
||||
+ "$number=($dgt+($mid_num$dgt+)*);"
|
||||
// a number is a sequence of digits that may contain internal
|
||||
// punctuation, as long as it begins and ends with a digit and
|
||||
// never contains two punctuation marks in a row.
|
||||
+"$number=($dgt+($mid_num$dgt+)*);"
|
||||
|
||||
// break after every character, with the following exceptions
|
||||
// (this will cause punctuation marks that aren't considered
|
||||
// part of words or numbers to be treated as words unto themselves)
|
||||
+ ".;"
|
||||
// break after every character, with the following exceptions
|
||||
// (this will cause punctuation marks that aren't considered
|
||||
// part of words or numbers to be treated as words unto themselves)
|
||||
+".;"
|
||||
|
||||
// keep together any sequence of contiguous words and numbers
|
||||
// (including just one of either), plus an optional trailing
|
||||
// number-suffix character
|
||||
+ "$word?($number$word)*($number$post_num?)?;"
|
||||
// keep together any sequence of contiguous words and numbers
|
||||
// (including just one of either), plus an optional trailing
|
||||
// number-suffix character
|
||||
+"$word?($number$word)*($number$post_num?)?;"
|
||||
|
||||
// keep together and sequence of contiguous words and numbers
|
||||
// that starts with a number-prefix character and a number,
|
||||
// and may end with a number-suffix character
|
||||
+ "$pre_num($number$word)*($number$post_num?)?;"
|
||||
// keep together and sequence of contiguous words and numbers
|
||||
// that starts with a number-prefix character and a number,
|
||||
// and may end with a number-suffix character
|
||||
+"$pre_num($number$word)*($number$post_num?)?;"
|
||||
|
||||
// keep together runs of whitespace (optionally with a single trailing
|
||||
// line separator or CRLF sequence)
|
||||
+ "$ws*\r?$ls?;"
|
||||
// keep together runs of whitespace (optionally with a single trailing
|
||||
// line separator or CRLF sequence)
|
||||
+"$ws*\r?$ls?;"
|
||||
|
||||
// keep together runs of Katakana
|
||||
+ "$kata*;"
|
||||
// keep together runs of Katakana
|
||||
+"$kata*;"
|
||||
|
||||
// keep together runs of Hiragana
|
||||
+ "$hira*;"
|
||||
// keep together runs of Hiragana
|
||||
+"$hira*;"
|
||||
|
||||
// keep together runs of Kanji
|
||||
+ "$kanji*;"},
|
||||
// keep together runs of Kanji
|
||||
+"$kanji*;" },
|
||||
|
||||
// These are the same line-breaking rules as are specified in the default
|
||||
// resource, except that the Latin letters, apostrophe, and hyphen are
|
||||
// specified as dictionary characters
|
||||
{ "LineBreakRules",
|
||||
// ignore non-spacing marks, enclosing marks, and format characters
|
||||
"$_ignore_=[[:Mn:][:Me:][:Cf:]];"
|
||||
// These are the same line-breaking rules as are specified in the default
|
||||
// resource, except that the Latin letters, apostrophe, and hyphen are
|
||||
// specified as dictionary characters
|
||||
{ "LineBreakRules",
|
||||
// ignore non-spacing marks, enclosing marks, and format characters
|
||||
"$_ignore_=[[:Mn:][:Me:][:Cf:]];"
|
||||
|
||||
// lower and upper case Roman letters, apostrophy and dash
|
||||
// are in the English dictionary
|
||||
+ "$_dictionary_=[a-zA-Z\\'\\-];"
|
||||
// lower and upper case Roman letters, apostrophy and dash
|
||||
// are in the English dictionary
|
||||
+"$_dictionary_=[a-zA-Z\\'\\-];"
|
||||
|
||||
// Hindi phrase separators
|
||||
+ "$danda=[\u0964\u0965];"
|
||||
// Hindi phrase separators
|
||||
+"$danda=[\u0964\u0965];"
|
||||
|
||||
// characters that always cause a break: ETX, tab, LF, FF, LS, and PS
|
||||
+ "$break=[\u0003\t\n\f\u2028\u2029];"
|
||||
// characters that always cause a break: ETX, tab, LF, FF, LS, and PS
|
||||
+"$break=[\u0003\t\n\f\u2028\u2029];"
|
||||
|
||||
// characters that always prevent a break: the non-breaking space
|
||||
// and similar characters
|
||||
+ "$nbsp=[\u00a0\u2007\u2011\ufeff];"
|
||||
// characters that always prevent a break: the non-breaking space
|
||||
// and similar characters
|
||||
+"$nbsp=[\u00a0\u2007\u2011\ufeff];"
|
||||
|
||||
// whitespace: space separators and control characters, except for
|
||||
// CR and the other characters mentioned above
|
||||
+ "$space=[[[:Zs:][:Cc:]]-[$nbsp$break\r]];"
|
||||
// whitespace: space separators and control characters, except for
|
||||
// CR and the other characters mentioned above
|
||||
+"$space=[[[:Zs:][:Cc:]]-[$nbsp$break\r]];"
|
||||
|
||||
// dashes: dash punctuation and the discretionary hyphen, except for
|
||||
// non-breaking hyphens
|
||||
+ "$dash=[[[:Pd:]\u00ad]-[$nbsp]];"
|
||||
// dashes: dash punctuation and the discretionary hyphen, except for
|
||||
// non-breaking hyphens
|
||||
+"$dash=[[[:Pd:]\u00ad]-[$nbsp]];"
|
||||
|
||||
// characters that stick to a word if they precede it: currency symbols
|
||||
// (except the cents sign) and starting punctuation
|
||||
+ "$pre_word=[[[:Sc:]-[\u00a2]][:Ps:]\\\"\\\'];"
|
||||
// characters that stick to a word if they precede it: currency symbols
|
||||
// (except the cents sign) and starting punctuation
|
||||
+"$pre_word=[[[:Sc:]-[\u00a2]][:Ps:]\\\"\\\'];"
|
||||
|
||||
// characters that stick to a word if they follow it: ending punctuation,
|
||||
// other punctuation that usually occurs at the end of a sentence,
|
||||
// small Kana characters, some CJK diacritics, etc.
|
||||
+ "$post_word=[[:Pe:]\\!\\\"\\\'\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034"
|
||||
+ "\u2103\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063"
|
||||
+ "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9"
|
||||
+ "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0c"
|
||||
+ "\uff0e\uff1f];"
|
||||
// characters that stick to a word if they follow it: ending punctuation,
|
||||
// other punctuation that usually occurs at the end of a sentence,
|
||||
// small Kana characters, some CJK diacritics, etc.
|
||||
+"$post_word=[[:Pe:]\\!\\\"\\\'\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034"
|
||||
+ "\u2103\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063"
|
||||
+ "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9"
|
||||
+ "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0c"
|
||||
+ "\uff0e\uff1f];"
|
||||
|
||||
// Kanji: actually includes both Kanji and Kana, except for small Kana and
|
||||
// CJK diacritics
|
||||
+ "$kanji=[[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa]-[$post_word$_ignore_]];"
|
||||
// Kanji: actually includes both Kanji and Kana, except for small Kana and
|
||||
// CJK diacritics
|
||||
+"$kanji=[[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa]-[$post_word$_ignore_]];"
|
||||
|
||||
// digits
|
||||
+ "$digit=[[:Nd:][:No:]];"
|
||||
// digits
|
||||
+"$digit=[[:Nd:][:No:]];"
|
||||
|
||||
// punctuation that can occur in the middle of a number: periods and commas
|
||||
+ "$mid_num=[\\.\\,];"
|
||||
// punctuation that can occur in the middle of a number: periods and commas
|
||||
+"$mid_num=[\\.\\,];"
|
||||
|
||||
// everything not mentioned above, plus the quote marks (which are both
|
||||
// <pre-word>, <post-word>, and <char>)
|
||||
+ "$char=[^$break$space$dash$kanji$nbsp$_ignore_$pre_word$post_word$mid_num$danda\r\\\"\\\'];"
|
||||
// everything not mentioned above, plus the quote marks (which are both
|
||||
// <pre-word>, <post-word>, and <char>)
|
||||
+"$char=[^$break$space$dash$kanji$nbsp$_ignore_$pre_word$post_word$mid_num$danda\r\\\"\\\'];"
|
||||
|
||||
// a "number" is a run of prefix characters and dashes, followed by one or
|
||||
// more digits with isolated number-punctuation characters interspersed
|
||||
+ "$number=([$pre_word$dash]*$digit+($mid_num$digit+)*);"
|
||||
// a "number" is a run of prefix characters and dashes, followed by one or
|
||||
// more digits with isolated number-punctuation characters interspersed
|
||||
+"$number=([$pre_word$dash]*$digit+($mid_num$digit+)*);"
|
||||
|
||||
// the basic core of a word can be either a "number" as defined above, a single
|
||||
// "Kanji" character, or a run of any number of not-explicitly-mentioned
|
||||
// characters (this includes Latin letters)
|
||||
+ "$word_core=([$pre_word$char]*|$kanji|$number);"
|
||||
// the basic core of a word can be either a "number" as defined above, a single
|
||||
// "Kanji" character, or a run of any number of not-explicitly-mentioned
|
||||
// characters (this includes Latin letters)
|
||||
+"$word_core=([$pre_word$char]*|$kanji|$number);"
|
||||
|
||||
// a word may end with an optional suffix that be either a run of one or
|
||||
// more dashes or a run of word-suffix characters, followed by an optional
|
||||
// run of whitespace
|
||||
+ "$word_suffix=(($dash+|$post_word*)$space*);"
|
||||
// a word may end with an optional suffix that be either a run of one or
|
||||
// more dashes or a run of word-suffix characters, followed by an optional
|
||||
// run of whitespace
|
||||
+"$word_suffix=(($dash+|$post_word*)$space*);"
|
||||
|
||||
// a word, thus, is an optional run of word-prefix characters, followed by
|
||||
// a word core and a word suffix (the syntax of <word-core> and <word-suffix>
|
||||
// actually allows either of them to match the empty string, putting a break
|
||||
// between things like ")(" or "aaa(aaa"
|
||||
+ "$word=($pre_word*$word_core$word_suffix);"
|
||||
// a word, thus, is an optional run of word-prefix characters, followed by
|
||||
// a word core and a word suffix (the syntax of <word-core> and <word-suffix>
|
||||
// actually allows either of them to match the empty string, putting a break
|
||||
// between things like ")(" or "aaa(aaa"
|
||||
+"$word=($pre_word*$word_core$word_suffix);"
|
||||
|
||||
// finally, the rule that does the work: Keep together any run of words that
|
||||
// are joined by runs of one of more non-spacing mark. Also keep a trailing
|
||||
// line-break character or CRLF combination with the word. (line separators
|
||||
// "win" over nbsp's)
|
||||
+ "$word($nbsp+$word)*\r?$break?;" },
|
||||
// finally, the rule that does the work: Keep together any run of words that
|
||||
// are joined by runs of one of more non-spacing mark. Also keep a trailing
|
||||
// line-break character or CRLF combination with the word. (line separators
|
||||
// "win" over nbsp's)
|
||||
+"$word($nbsp+$word)*\r?$break?;" },
|
||||
|
||||
// these two resources specify the pathnames of the dictionary files to
|
||||
// use for word breaking and line breaking. Both currently refer to
|
||||
// a file called english.dict placed in com.ibm.icu.impl.data
|
||||
// somewhere in the class path. It's important to note that
|
||||
// english.dict was created for testing purposes only, and doesn't
|
||||
// come anywhere close to being an exhaustive dictionary of English
|
||||
// words (basically, it contains all the words in the Declaration of
|
||||
// Independence, and the Revised Standard Version of the book of Genesis,
|
||||
// plus a few other words thrown in to show more interesting cases).
|
||||
// { "WordBreakDictionary", "com\\ibm\\text\\resources\\english.dict" },
|
||||
// { "LineBreakDictionary", "com\\ibm\\text\\resources\\english.dict" }
|
||||
{ "WordBreakDictionary", url },
|
||||
{ "LineBreakDictionary", url }
|
||||
};
|
||||
// these two resources specify the pathnames of the dictionary files to
|
||||
// use for word breaking and line breaking. Both currently refer to
|
||||
// a file called english.dict placed in com.ibm.icu.impl.data
|
||||
// somewhere in the class path. It's important to note that
|
||||
// english.dict was created for testing purposes only, and doesn't
|
||||
// come anywhere close to being an exhaustive dictionary of English
|
||||
// words (basically, it contains all the words in the Declaration of
|
||||
// Independence, and the Revised Standard Version of the book of Genesis,
|
||||
// plus a few other words thrown in to show more interesting cases).
|
||||
// { "WordBreakDictionary", "com\\ibm\\text\\resources\\english.dict" },
|
||||
// { "LineBreakDictionary", "com\\ibm\\text\\resources\\english.dict" }
|
||||
{ "WordBreakDictionary", DATA_NAME },
|
||||
{ "LineBreakDictionary", DATA_NAME }
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,16 +5,54 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/rbbi/SimpleBITest.java,v $
|
||||
* $Date: 2003/06/03 18:49:30 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2004/02/06 21:53:59 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.rbbi;
|
||||
|
||||
import java.util.Locale;
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ListResourceBundle;
|
||||
import java.util.MissingResourceException;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import com.ibm.icu.text.DictionaryBasedBreakIterator;
|
||||
import com.ibm.icu.text.RuleBasedBreakIterator;
|
||||
|
||||
// TODO: {dlf} this test currently doesn't test anything!
|
||||
// You'll notice that the resource that uses the dictionary isn't even on the resource path,
|
||||
// so the dictionary never gets used. Good thing, too, because it would throw a security
|
||||
// exception if run with a security manager. Not that it would matter, the dictionary
|
||||
// resource isn't even in the icu source tree!
|
||||
// In order to fix this:
|
||||
// 1) make sure english.dict matches the current dictionary format required by dbbi
|
||||
// 2) make sure english.dict gets included in icu4jtests.jar
|
||||
// 3) have this test use getResourceAsStream to get a stream on the dictionary, and
|
||||
// directly instantiate a DictionaryBasedBreakIterator. It can use the rules from
|
||||
// the appropriate section of ResourceBundle_en_US_TEST. I'd suggest just copying
|
||||
// the rules into this file.
|
||||
// 4) change the test text by inserting '|' at word breaks, and '||' at line breaks.
|
||||
// 5) process this text to a) create tables of break indices, and b) clean up the test
|
||||
// for the break iterator to work on
|
||||
//
|
||||
// This would NOT test the ability to load dictionary-based break iterators through our
|
||||
// normal resource mechanism. One could install such a break iterator and its
|
||||
// resources into the icu4j jar, and it would work, but there's no way to register entire
|
||||
// resources from outside yet. Even if there were, the access restrictions are a bit
|
||||
// difficult to manage, if one wanted to register a break iterator whose code and data
|
||||
// resides outside the icu4j jar. Since the code to instantiate would be going through
|
||||
// two protection domains, each domain would have to allow access to the data-- but
|
||||
// icu4j's domain wouldn't know about ours. So we could instantiate before registering
|
||||
// the break iterator, but this would mean we'd have to fully initialize the dictionary(s)
|
||||
// at instantiation time, rather than let this be deferred until they are actually needed.
|
||||
//
|
||||
// I've done items 2 and 3 above. Unfortunately, since I haven't done item 1, the
|
||||
// dictionary builder crashes. So for now I'm disabling this test. This is not
|
||||
// that important, since we have a thai dictionary that we do test thoroughly.
|
||||
//
|
||||
|
||||
public class SimpleBITest extends TestFmwk{
|
||||
public static final String testText =
|
||||
|
@ -103,21 +141,78 @@ public class SimpleBITest extends TestFmwk{
|
|||
public static void main(String[] args) throws Exception {
|
||||
new SimpleBITest().run(args);
|
||||
}
|
||||
|
||||
protected boolean validate() {
|
||||
// TODO: remove when english.dict gets fixed
|
||||
return false;
|
||||
}
|
||||
|
||||
private BreakIterator createTestIterator(int kind) {
|
||||
final String bname = "com.ibm.icu.dev.test.rbbi.BreakIteratorRules_en_US_TEST";
|
||||
|
||||
BreakIterator iter = null;
|
||||
|
||||
ListResourceBundle bundle = null;
|
||||
try {
|
||||
Class cls = Class.forName(bname);
|
||||
bundle = (ListResourceBundle)cls.newInstance();
|
||||
}
|
||||
catch (Exception e) {
|
||||
///CLOVER:OFF
|
||||
errln("could not create bundle: " + bname + "exception: " + e.getMessage());
|
||||
///CLOVER:ON
|
||||
return null;
|
||||
}
|
||||
|
||||
final String[] kindNames = {
|
||||
"Character", "Word", "Line", "Sentence"
|
||||
};
|
||||
String rulesName = kindNames[kind] + "BreakRules";
|
||||
String dictionaryName = kindNames[kind] + "BreakDictionary";
|
||||
|
||||
String[] classNames = bundle.getStringArray("BreakIteratorClasses");
|
||||
String rules = bundle.getString(rulesName);
|
||||
if (classNames[kind].equals("RuleBasedBreakIterator")) {
|
||||
iter = new RuleBasedBreakIterator(rules);
|
||||
}
|
||||
else if (classNames[kind].equals("DictionaryBasedBreakIterator")) {
|
||||
try {
|
||||
String dictionaryPath = bundle.getString(dictionaryName);
|
||||
InputStream dictionary = bundle.getClass().getResourceAsStream(dictionaryPath);
|
||||
System.out.println("looking for " + dictionaryPath + " from " + bundle.getClass() + " returned " + dictionary);
|
||||
iter = new DictionaryBasedBreakIterator(rules, dictionary);
|
||||
}
|
||||
catch(IOException e) {
|
||||
e.printStackTrace();
|
||||
errln(e.getMessage());
|
||||
System.out.println(e); // debug
|
||||
}
|
||||
catch(MissingResourceException e) {
|
||||
errln(e.getMessage());
|
||||
System.out.println(e); // debug
|
||||
}
|
||||
}
|
||||
if (iter == null) {
|
||||
errln("could not create iterator");
|
||||
}
|
||||
|
||||
return iter;
|
||||
}
|
||||
|
||||
public void testWordBreak() throws Exception {
|
||||
BreakIterator wordBreak =(BreakIterator) BreakIterator.getWordInstance(new Locale("en", "US", "TEST"));
|
||||
BreakIterator wordBreak = createTestIterator(BreakIterator.KIND_WORD);
|
||||
int breaks = doTest(wordBreak);
|
||||
logln(String.valueOf(breaks));
|
||||
}
|
||||
|
||||
public void testLineBreak() throws Exception {
|
||||
BreakIterator lineBreak = BreakIterator.getLineInstance(new Locale("en", "US", "TEST"));
|
||||
BreakIterator lineBreak = createTestIterator(BreakIterator.KIND_LINE);
|
||||
int breaks = doTest(lineBreak);
|
||||
logln(String.valueOf(breaks));
|
||||
}
|
||||
|
||||
public void testSentenceBreak() throws Exception {
|
||||
BreakIterator sentenceBreak = BreakIterator.getSentenceInstance(new Locale("en", "US", "TEST"));
|
||||
BreakIterator sentenceBreak = createTestIterator(BreakIterator.KIND_SENTENCE);
|
||||
int breaks = doTest(sentenceBreak);
|
||||
logln(String.valueOf(breaks));
|
||||
}
|
||||
|
|
|
@ -1,41 +1,8 @@
|
|||
|
||||
// Standard extensions get all permissions by default
|
||||
|
||||
grant codeBase "file:${java.home}/lib/ext/*" {
|
||||
permission java.security.AllPermission;
|
||||
};
|
||||
|
||||
// default permissions granted to all domains
|
||||
|
||||
grant {
|
||||
// allows anyone to listen on un-privileged ports
|
||||
permission java.net.SocketPermission "localhost:1024-", "listen";
|
||||
|
||||
// "standard" properies that can be read by anyone
|
||||
|
||||
permission java.util.PropertyPermission "java.version", "read";
|
||||
permission java.util.PropertyPermission "java.vendor", "read";
|
||||
permission java.util.PropertyPermission "java.vendor.url", "read";
|
||||
permission java.util.PropertyPermission "java.class.version", "read";
|
||||
permission java.util.PropertyPermission "os.name", "read";
|
||||
permission java.util.PropertyPermission "os.version", "read";
|
||||
permission java.util.PropertyPermission "os.arch", "read";
|
||||
permission java.util.PropertyPermission "file.separator", "read";
|
||||
permission java.util.PropertyPermission "path.separator", "read";
|
||||
permission java.util.PropertyPermission "line.separator", "read";
|
||||
|
||||
permission java.util.PropertyPermission "java.specification.version", "read";
|
||||
permission java.util.PropertyPermission "java.specification.vendor", "read";
|
||||
permission java.util.PropertyPermission "java.specification.name", "read";
|
||||
|
||||
permission java.util.PropertyPermission "java.vm.specification.version", "read";
|
||||
permission java.util.PropertyPermission "java.vm.specification.vendor", "read";
|
||||
permission java.util.PropertyPermission "java.vm.specification.name", "read";
|
||||
permission java.util.PropertyPermission "java.vm.version", "read";
|
||||
permission java.util.PropertyPermission "java.vm.vendor", "read";
|
||||
permission java.util.PropertyPermission "java.vm.name", "read";
|
||||
|
||||
/// policies required by test framework
|
||||
// policies needed to run tests
|
||||
grant // codebase "file:${user.dir}/icu4jtests.jar"
|
||||
{
|
||||
// temporary for debugging
|
||||
// permission java.lang.RuntimePermission "getProtectionDomain";
|
||||
|
||||
// needed for Locale.setDefault, only used in tests and demos
|
||||
permission java.util.PropertyPermission "user.language", "write";
|
||||
|
@ -43,8 +10,8 @@ grant {
|
|||
// needed for TestUtils
|
||||
permission java.util.PropertyPermission "ICUDataPath", "read";
|
||||
permission java.util.PropertyPermission "user.dir", "read";
|
||||
// yuck, why doesn't the security code normalize file paths?!?!? supply both versions...
|
||||
permission java.io.FilePermission "src\\com\\ibm\\icu\\dev\\data\\-", "read";
|
||||
permission java.io.FilePermission "src/com/ibm/icu/dev/data/-", "read";
|
||||
|
||||
// time zone tests
|
||||
permission java.util.PropertyPermission "user.timezone", "read";
|
||||
};
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/stringprep/TestIDNA.java,v $
|
||||
* $Date: 2003/12/02 03:17:15 $
|
||||
* $Revision: 1.6 $
|
||||
* $Date: 2004/02/06 21:53:59 $
|
||||
* $Revision: 1.7 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -21,7 +21,7 @@ import com.ibm.icu.text.StringPrepParseException;
|
|||
import com.ibm.icu.text.StringPrep;
|
||||
import com.ibm.icu.text.UCharacterIterator;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.impl.LocaleUtility;
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
|
||||
/**
|
||||
|
@ -294,7 +294,7 @@ public class TestIDNA extends TestFmwk {
|
|||
}
|
||||
}
|
||||
public void TestNamePrepConformance() throws Exception{
|
||||
InputStream stream = LocaleUtility.getImplDataResourceAsStream("uidna.spp");
|
||||
InputStream stream = ICUData.getRequiredStream("data/uidna.spp");
|
||||
StringPrep namePrep = new StringPrep(stream);
|
||||
for(int i=0; i<TestData.conformanceTestCases.length;i++){
|
||||
TestData.ConformanceTestCase testCase = TestData.conformanceTestCases[i];
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/timezone/TimeZoneTest.java,v $
|
||||
* $Date: 2004/01/05 23:00:14 $
|
||||
* $Revision: 1.18 $
|
||||
* $Date: 2004/02/06 21:53:59 $
|
||||
* $Revision: 1.19 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -833,7 +833,9 @@ public class TimeZoneTest extends TestFmwk
|
|||
final Class[] argtypes = new Class[0];
|
||||
java.lang.reflect.Method m = tz_java.getClass().getMethod("getDSTSavings", argtypes);
|
||||
dst_java = ((Integer) m.invoke(tz_java, args)).intValue();
|
||||
} catch (Exception e) {
|
||||
} catch (Exception e) {
|
||||
// see JDKTimeZone for the reason for this code
|
||||
dst_java = 3600000;
|
||||
}
|
||||
|
||||
com.ibm.icu.util.TimeZone tz_icu = com.ibm.icu.util.TimeZone.getTimeZone(tzName);
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/CompoundTransliteratorTest.java,v $
|
||||
* $Date: 2003/06/03 18:49:31 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2004/02/06 21:54:06 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -41,7 +41,7 @@ public class CompoundTransliteratorTest extends TestFmwk {
|
|||
t3=Transliterator.getInstance(names[2]);
|
||||
t4=Transliterator.getInstance(names[3]);
|
||||
}catch(IllegalArgumentException ex) {
|
||||
errln("FAIL: Transliterator construction failed");
|
||||
errln("FAIL: Transliterator construction failed" + ex.getMessage());
|
||||
throw ex;
|
||||
}
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/ICUListResourceBundleTest.java,v $
|
||||
* $Date: 2003/11/21 22:20:36 $
|
||||
* $Revision: 1.12 $
|
||||
* $Date: 2004/02/06 21:54:05 $
|
||||
* $Revision: 1.13 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -158,6 +158,8 @@ public final class ICUListResourceBundleTest extends TestFmwk
|
|||
}
|
||||
return isEqual;
|
||||
}
|
||||
|
||||
//
|
||||
public void TestAliases(){
|
||||
ResourceBundle rb = ICULocaleData.getResourceBundle("com.ibm.icu.dev.data","TestDataElements","testaliases");
|
||||
//rb.getObject("CollationElements");
|
||||
|
|
69
icu4j/src/com/ibm/icu/impl/ICUData.java
Normal file
69
icu4j/src/com/ibm/icu/impl/ICUData.java
Normal file
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* Created on Feb 4, 2004
|
||||
*/
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
import java.security.AccessController;
|
||||
import java.security.PrivilegedAction;
|
||||
|
||||
/**
|
||||
* @author dougfelt
|
||||
*
|
||||
* Provides access to ICU data files as InputStreams. Implements security checking.
|
||||
*/
|
||||
public final class ICUData {
|
||||
/*
|
||||
* Return a URL to the ICU resource names resourceName. The
|
||||
* resource name should either be an absolute path, or a path relative to
|
||||
* com.ibm.icu.impl (e.g., most likely it is 'data/foo'). If required
|
||||
* is true, throw an InternalError instead of returning a null result.
|
||||
*/
|
||||
public static boolean exists(final String resourceName) {
|
||||
URL i = null;
|
||||
if (System.getSecurityManager() != null) {
|
||||
i = (URL)AccessController.doPrivileged(
|
||||
new PrivilegedAction() {
|
||||
public Object run() {
|
||||
return ICUData.class.getResource(resourceName);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
i = ICUData.class.getResource(resourceName);
|
||||
}
|
||||
return i != null;
|
||||
}
|
||||
|
||||
private static InputStream getStream(final String resourceName, boolean required) {
|
||||
InputStream i = null;
|
||||
if (System.getSecurityManager() != null) {
|
||||
i = (InputStream)AccessController.doPrivileged(
|
||||
new PrivilegedAction() {
|
||||
public Object run() {
|
||||
return ICUData.class.getResourceAsStream(resourceName);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
i = ICUData.class.getResourceAsStream(resourceName);
|
||||
}
|
||||
if (i == null && required) {
|
||||
throw new InternalError("could not locate data " + resourceName);
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convenience override that calls get(resourceName, false);
|
||||
*/
|
||||
public static InputStream getStream(String resourceName) {
|
||||
return getStream(resourceName, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convenience method that calls get(resourceName, true).
|
||||
*/
|
||||
public static InputStream getRequiredStream(String resourceName) {
|
||||
return getStream(resourceName, true);
|
||||
}
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/ICUListResourceBundle.java,v $
|
||||
* $Date: 2003/12/31 21:23:41 $
|
||||
* $Revision: 1.18 $
|
||||
* $Date: 2004/02/06 21:54:01 $
|
||||
* $Revision: 1.19 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -238,17 +238,16 @@ public class ICUListResourceBundle extends ListResourceBundle {
|
|||
private byte[] expanded=null;
|
||||
private String resName=null;
|
||||
public ResourceBinary(String name){
|
||||
resName=name;
|
||||
resName="data/" + name;
|
||||
}
|
||||
public Object getResource(Object obj) throws Exception{
|
||||
if(expanded==null){
|
||||
InputStream stream = obj.getClass().getResourceAsStream(resName);
|
||||
InputStream stream = ICUData.getStream(resName);
|
||||
if(stream!=null){
|
||||
//throw new MissingResourceException("",obj.getClass().getName(),resName);
|
||||
expanded = readToEOS(stream);
|
||||
return expanded;
|
||||
}
|
||||
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
@ -258,12 +257,12 @@ public class ICUListResourceBundle extends ListResourceBundle {
|
|||
private char[] expanded=null;
|
||||
private String resName=null;
|
||||
public ResourceString(String name){
|
||||
resName=name;
|
||||
resName="data/"+name;
|
||||
}
|
||||
public Object getResource(Object obj) throws Exception{
|
||||
if(expanded==null){
|
||||
// Resource strings are always UTF-8
|
||||
InputStream stream = obj.getClass().getResourceAsStream(resName);
|
||||
InputStream stream = ICUData.getStream(resName);
|
||||
if(stream!=null){
|
||||
//throw new MissingResourceException("",obj.getClass().getName(),resName);
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ public class ICULocaleData {
|
|||
private static Locale[] localeList;
|
||||
private static final String PACKAGE1 = "com.ibm.icu.impl.data";
|
||||
private static final String[] packageNames = { PACKAGE1 };
|
||||
private static boolean debug = ICUDebug.enabled("localedata");
|
||||
private static final boolean debug = ICUDebug.enabled("localedata");
|
||||
|
||||
/**
|
||||
* Returns a list of the installed locales.
|
||||
|
@ -207,10 +207,10 @@ public class ICULocaleData {
|
|||
} else {
|
||||
i = name.length();
|
||||
}
|
||||
|
||||
Class cls = ICULocaleData.class.getClassLoader().loadClass(name);
|
||||
if (ICUListResourceBundle.class.isAssignableFrom(cls)) {
|
||||
ICUListResourceBundle bx = (ICUListResourceBundle)cls.newInstance();
|
||||
|
||||
|
||||
if (parent != null) {
|
||||
bx.setParentX(parent);
|
||||
|
@ -385,6 +385,7 @@ public class ICULocaleData {
|
|||
// ignore, keep looking
|
||||
}
|
||||
catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
if (debug) {
|
||||
System.out.println(e.getMessage());
|
||||
}
|
||||
|
@ -418,7 +419,7 @@ public class ICULocaleData {
|
|||
return Collections.unmodifiableSet(set);
|
||||
}
|
||||
catch (MissingResourceException e) {
|
||||
System.out.println("couldn't find index for bundleName: " + bundleName);
|
||||
if (debug) System.out.println("couldn't find index for bundleName: " + bundleName);
|
||||
Thread.dumpStack();
|
||||
}
|
||||
return Collections.EMPTY_SET;
|
||||
|
@ -435,7 +436,7 @@ public class ICULocaleData {
|
|||
return locales;
|
||||
}
|
||||
catch (MissingResourceException e) {
|
||||
System.out.println("couldn't find index for bundleName: " + bundleName);
|
||||
if (debug) System.out.println("couldn't find index for bundleName: " + bundleName);
|
||||
Thread.dumpStack();
|
||||
}
|
||||
return new Locale[0];
|
||||
|
|
|
@ -210,8 +210,14 @@ public class JDKTimeZone extends TimeZone {
|
|||
final Class[] argtypes = new Class[0];
|
||||
Method m = zone.getClass().getMethod("getDSTSavings", argtypes);
|
||||
return ((Integer) m.invoke(zone, args)).intValue();
|
||||
} catch (Exception e) {
|
||||
// should never happen
|
||||
} catch (Exception e) {
|
||||
// if zone is in the sun.foo class hierarchy and we
|
||||
// are in a protection domain, we'll get a security
|
||||
// exception. And if we claim to support DST, but
|
||||
// return a value of 0, later java.util.SimpleTimeZone will
|
||||
// throw an illegalargument exception. so... fake
|
||||
// the dstoffset;
|
||||
return 3600000;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
|
|
@ -5,14 +5,13 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/LocaleUtility.java,v $
|
||||
* $Date: 2003/08/21 23:41:25 $
|
||||
* $Revision: 1.9 $
|
||||
* $Date: 2004/02/06 21:54:00 $
|
||||
* $Revision: 1.10 $
|
||||
* *****************************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
|
@ -132,10 +131,4 @@ public class LocaleUtility {
|
|||
}
|
||||
return new Locale(parts[0], parts[1], parts[2]);
|
||||
}
|
||||
|
||||
public static InputStream getImplDataResourceAsStream(String name){
|
||||
Class myClass = new LocaleUtility().getClass();
|
||||
String fullName = "data/"+name;
|
||||
return myClass.getResourceAsStream(fullName);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/NormalizerImpl.java,v $
|
||||
* $Date: 2003/11/14 00:06:08 $
|
||||
* $Revision: 1.23 $
|
||||
* $Date: 2004/02/06 21:54:00 $
|
||||
* $Revision: 1.24 $
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
|
@ -279,12 +279,12 @@ public final class NormalizerImpl {
|
|||
* Constructor
|
||||
* @exception thrown when data reading fails or data corrupted
|
||||
*/
|
||||
private NormalizerImpl() throws IOException{
|
||||
private NormalizerImpl() throws IOException {
|
||||
//data should be loaded only once
|
||||
if(!isDataLoaded){
|
||||
|
||||
// jar access
|
||||
InputStream i = getClass().getResourceAsStream(DATA_FILE_NAME);
|
||||
// jar access
|
||||
InputStream i = ICUData.getRequiredStream(DATA_FILE_NAME);
|
||||
BufferedInputStream b = new BufferedInputStream(i,DATA_BUFFER_SIZE);
|
||||
NormalizerDataReader reader = new NormalizerDataReader(b);
|
||||
|
||||
|
@ -307,7 +307,6 @@ public final class NormalizerImpl {
|
|||
normTrieImpl = new NormTrieImpl();
|
||||
auxTrieImpl = new AuxTrieImpl();
|
||||
|
||||
|
||||
// load the rest of the data data and initialize the data members
|
||||
reader.read(normBytes, fcdBytes,auxBytes, extraData, combiningTable,
|
||||
canonStartSets);
|
||||
|
@ -333,7 +332,6 @@ public final class NormalizerImpl {
|
|||
);
|
||||
|
||||
b.close();
|
||||
i.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
*
|
||||
* $Source:
|
||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $
|
||||
* $Date: 2003/06/09 23:15:00 $
|
||||
* $Revision: 1.6 $
|
||||
* $Date: 2004/02/06 21:54:02 $
|
||||
* $Revision: 1.7 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -16,7 +16,7 @@ package com.ibm.icu.impl;
|
|||
import java.io.InputStream;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
|
@ -65,7 +65,7 @@ public final class UCharacterName
|
|||
try {
|
||||
INSTANCE_ = new UCharacterName();
|
||||
}catch(IOException e){
|
||||
throw new IllegalArgumentException("Could not construct UCharacterName. Missing unames.icu?");
|
||||
throw new InternalError("Could not construct UCharacterName. Missing unames.icu?");
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException(e.getMessage());
|
||||
|
@ -1192,17 +1192,11 @@ public final class UCharacterName
|
|||
*/
|
||||
private UCharacterName() throws IOException
|
||||
{
|
||||
InputStream i = getClass().getResourceAsStream(NAME_FILE_NAME_);
|
||||
if(i!=null ){
|
||||
BufferedInputStream b = new BufferedInputStream(i,
|
||||
NAME_BUFFER_SIZE_);
|
||||
|
||||
UCharacterNameReader reader = new UCharacterNameReader(b);
|
||||
reader.read(this);
|
||||
} else{
|
||||
throw new IOException("unames.icu could not be opened. Is ICUModularBuild?");
|
||||
}
|
||||
i.close();
|
||||
InputStream is = ICUData.getRequiredStream(NAME_FILE_NAME_);
|
||||
BufferedInputStream b = new BufferedInputStream(is, NAME_BUFFER_SIZE_);
|
||||
UCharacterNameReader reader = new UCharacterNameReader(b);
|
||||
reader.read(this);
|
||||
b.close();
|
||||
}
|
||||
|
||||
// private methods ---------------------------------------------------
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
*
|
||||
* $Source:
|
||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterPropertyDB.java $
|
||||
* $Date: 2003/12/17 04:56:04 $
|
||||
* $Revision: 1.35 $
|
||||
* $Date: 2004/02/06 21:54:01 $
|
||||
* $Revision: 1.36 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -1594,16 +1594,12 @@ public final class UCharacterProperty implements Trie.DataManipulate
|
|||
private UCharacterProperty() throws IOException
|
||||
{
|
||||
// jar access
|
||||
InputStream i = getClass().getResourceAsStream(DATA_FILE_NAME_);
|
||||
if(i==null){
|
||||
throw new IOException("Could not load the file: "+DATA_FILE_NAME_);
|
||||
}
|
||||
BufferedInputStream b = new BufferedInputStream(i,
|
||||
DATA_BUFFER_SIZE_);
|
||||
InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME_);
|
||||
BufferedInputStream b = new BufferedInputStream(is, DATA_BUFFER_SIZE_);
|
||||
UCharacterPropertyReader reader = new UCharacterPropertyReader(b);
|
||||
reader.read(this);
|
||||
b.close();
|
||||
i.close();
|
||||
|
||||
m_trie_.putIndexData(this);
|
||||
}
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
package com.ibm.icu.impl;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
import com.ibm.icu.lang.*;
|
||||
|
||||
/**
|
||||
|
@ -118,7 +119,7 @@ public final class UPropertyAliases implements ICUBinary.Authenticate {
|
|||
public UPropertyAliases() throws IOException {
|
||||
|
||||
// Open the .icu file from the jar/classpath
|
||||
InputStream is = getClass().getResourceAsStream(DATA_FILE_NAME);
|
||||
InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME);
|
||||
BufferedInputStream b = new BufferedInputStream(is, DATA_BUFFER_SIZE);
|
||||
// Read and discard Unicode version...
|
||||
/* byte unicodeVersion[] = */ICUBinary.readHeader(b, DATA_FORMAT_ID, this);
|
||||
|
|
|
@ -5,24 +5,26 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/BreakIteratorRules_th.java,v $
|
||||
* $Date: 2003/07/03 17:48:12 $
|
||||
* $Revision: 1.10 $
|
||||
* $Date: 2004/02/06 21:54:04 $
|
||||
* $Revision: 1.11 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.impl.data;
|
||||
|
||||
import java.util.ListResourceBundle;
|
||||
import java.net.URL;
|
||||
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
|
||||
public class BreakIteratorRules_th extends ListResourceBundle {
|
||||
public Object[][] getContents() {
|
||||
private static final String DATA_NAME = "data/BreakDictionaryData_th.brk";
|
||||
|
||||
URL url = getClass().getResource("BreakDictionaryData_th.brk");
|
||||
public Object[][] getContents() {
|
||||
final boolean exists = ICUData.exists(DATA_NAME);
|
||||
|
||||
// if dictionary wasn't found, then this resource bundle doesn't have
|
||||
// much to contribute...
|
||||
if (url == null) {
|
||||
if (!exists) {
|
||||
return new Object[0][0];
|
||||
}
|
||||
|
||||
|
@ -235,8 +237,8 @@ public class BreakIteratorRules_th extends ListResourceBundle {
|
|||
+ "\u0e25[^$paiyannoi$_ignore_]);"
|
||||
},
|
||||
|
||||
{ "WordBreakDictionary", url },
|
||||
{ "LineBreakDictionary", url }
|
||||
{ "WordBreakDictionary", DATA_NAME }, // now a path to ICU4J-specific resource
|
||||
{ "LineBreakDictionary", DATA_NAME }
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/ResourceReader.java,v $
|
||||
* $Date: 2002/08/13 23:37:48 $
|
||||
* $Revision: 1.5 $
|
||||
* $Date: 2004/02/06 21:54:04 $
|
||||
* $Revision: 1.6 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -14,6 +14,8 @@ package com.ibm.icu.impl.data;
|
|||
|
||||
import java.io.*;
|
||||
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
|
||||
/**
|
||||
* A reader for text resource data in the current package. The
|
||||
* resource data is loaded through the class loader, so it will
|
||||
|
@ -47,7 +49,7 @@ public class ResourceReader {
|
|||
public ResourceReader(String resourceName, String encoding)
|
||||
throws UnsupportedEncodingException {
|
||||
|
||||
this.resourceName = resourceName;
|
||||
this.resourceName = "data/" + resourceName;
|
||||
this.encoding = encoding;
|
||||
isReset = false;
|
||||
_reset();
|
||||
|
@ -60,7 +62,7 @@ public class ResourceReader {
|
|||
* package
|
||||
*/
|
||||
public ResourceReader(String resourceName) {
|
||||
this.resourceName = resourceName;
|
||||
this.resourceName = "data/" + resourceName;
|
||||
this.encoding = null;
|
||||
isReset = false;
|
||||
try {
|
||||
|
@ -114,10 +116,11 @@ public class ResourceReader {
|
|||
if (isReset) {
|
||||
return;
|
||||
}
|
||||
InputStream is = getClass().getResourceAsStream(resourceName);
|
||||
InputStream is = ICUData.getStream(resourceName);
|
||||
if (is == null) {
|
||||
throw new IllegalArgumentException("Can't open " + resourceName);
|
||||
}
|
||||
|
||||
InputStreamReader isr =
|
||||
(encoding == null) ? new InputStreamReader(is) :
|
||||
new InputStreamReader(is, encoding);
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/lang/UCharacter.java,v $
|
||||
* $Date: 2004/01/07 20:06:24 $
|
||||
* $Revision: 1.85 $
|
||||
* $Date: 2004/02/06 21:54:00 $
|
||||
* $Revision: 1.86 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -4417,6 +4417,7 @@ public final class UCharacter
|
|||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
//throw new RuntimeException(e.getMessage());
|
||||
// DONOT throw an exception
|
||||
// we might be building ICU modularly wothout names.icu and pnames.icu
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
Manifest-Version: 1.0
|
||||
|
||||
Name: com/ibm/icu
|
||||
Specification-Title: Modularized ICU for Java
|
||||
Specification-Version: 2.8
|
||||
Specification-Vendor: ICU
|
||||
|
@ -6,7 +8,7 @@ Implementation-Title: Modularized ICU for Java
|
|||
Implementation-Version: 2.8.0
|
||||
Implementation-Vendor: IBM Corporation
|
||||
Implementation-Vendor-Id: com.ibm
|
||||
Copyright-Info: Copyright (c) 2002-2003, International Business Machines Corporation and others. All Rights Reserved.
|
||||
Copyright-Info: Copyright (c) 2000-2004, International Business Machines Corporation and others. All Rights Reserved.
|
||||
Sealed: false
|
||||
|
||||
Name: com/ibm/icu
|
||||
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
Manifest-Version: 1.0
|
||||
|
||||
Name: com/ibm/icu/
|
||||
Specification-Title: ICU for Java
|
||||
Specification-Version: 2.8
|
||||
Specification-Vendor: ICU
|
||||
|
@ -6,6 +8,5 @@ Implementation-Title: ICU for Java
|
|||
Implementation-Version: 2.8.0
|
||||
Implementation-Vendor: IBM Corporation
|
||||
Implementation-Vendor-Id: com.ibm
|
||||
Copyright-Info: Copyright (c) 2002-2003, International Business Machines Corporation and others. All Rights Reserved.
|
||||
|
||||
Name: com/ibm/icu
|
||||
Copyright-Info: Copyright (c) 2000-2004, International Business Machines Corporation and others. All Rights Reserved.
|
||||
Sealed: false
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/BreakIteratorFactory.java,v $
|
||||
* $Date: 2004/01/26 23:04:28 $
|
||||
* $Revision: 1.9 $
|
||||
* $Date: 2004/02/06 21:54:03 $
|
||||
* $Revision: 1.10 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -14,11 +14,11 @@ package com.ibm.icu.text;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
import java.util.Locale;
|
||||
import java.util.MissingResourceException;
|
||||
import java.util.ResourceBundle;
|
||||
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
import com.ibm.icu.impl.ICULocaleData;
|
||||
import com.ibm.icu.impl.ICULocaleService;
|
||||
import com.ibm.icu.impl.ICUService;
|
||||
|
@ -98,28 +98,30 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
|
|||
String rulesName,
|
||||
String dictionaryName) {
|
||||
|
||||
BreakIterator iter = null;
|
||||
ResourceBundle bundle = ICULocaleData.getResourceBundle("BreakIteratorRules", where);
|
||||
String[] classNames = bundle.getStringArray("BreakIteratorClasses");
|
||||
|
||||
String rules = bundle.getString(rulesName);
|
||||
|
||||
BreakIterator iter = null;
|
||||
|
||||
if (classNames[kind].equals("RuleBasedBreakIterator")) {
|
||||
iter = new RuleBasedBreakIterator(rules);
|
||||
}
|
||||
else if (classNames[kind].equals("DictionaryBasedBreakIterator")) {
|
||||
try {
|
||||
// System.out.println(dictionaryName);
|
||||
Object t = bundle.getObject(dictionaryName);
|
||||
// System.out.println(t);
|
||||
URL url = (URL)t;
|
||||
InputStream dictionary = url.openStream();
|
||||
InputStream dictionary = ICUData.getStream(bundle.getString(dictionaryName));
|
||||
// System.out.println("bundle: " + bundle + " dn: " + dictionaryName);
|
||||
// Object t = bundle.getObject(dictionaryName);
|
||||
// // System.out.println(t);
|
||||
// URL url = (URL)t;
|
||||
// System.out.println("url: " + url);
|
||||
// InputStream dictionary = url.openStream();
|
||||
// System.out.println("stream: " + dictionary);
|
||||
iter = new DictionaryBasedBreakIterator(rules, dictionary);
|
||||
}
|
||||
catch(IOException e) {
|
||||
System.out.println(e); // debug
|
||||
}
|
||||
catch(MissingResourceException e) {
|
||||
System.out.println(e); // debug
|
||||
}
|
||||
// TODO: we don't have 'bad' resource data, so this should never happen
|
||||
// in our current tests.
|
||||
|
|
|
@ -5,15 +5,14 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollationParsedRuleBuilder.java,v $
|
||||
* $Date: 2004/01/28 02:05:51 $
|
||||
* $Revision: 1.29 $
|
||||
* $Date: 2004/02/06 21:54:03 $
|
||||
* $Revision: 1.30 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.text.ParseException;
|
||||
import java.util.Hashtable;
|
||||
import java.util.Vector;
|
||||
|
@ -380,6 +379,12 @@ final class CollationParsedRuleBuilder
|
|||
* Initializing the inverse UCA
|
||||
*/
|
||||
static {
|
||||
InverseUCA temp = null;
|
||||
try {
|
||||
temp = CollatorReader.getInverseUCA();
|
||||
} catch (IOException e) {
|
||||
}
|
||||
/*
|
||||
try
|
||||
{
|
||||
String invdat = "/com/ibm/icu/impl/data/invuca.icu";
|
||||
|
@ -394,13 +399,17 @@ final class CollationParsedRuleBuilder
|
|||
e.printStackTrace();
|
||||
throw new RuntimeException(e.getMessage());
|
||||
}
|
||||
if(RuleBasedCollator.UCA_ != null) {
|
||||
if(!INVERSE_UCA_.m_UCA_version_.equals(RuleBasedCollator.UCA_.m_UCA_version_)) {
|
||||
*/
|
||||
|
||||
if(temp != null && RuleBasedCollator.UCA_ != null) {
|
||||
if(!temp.m_UCA_version_.equals(RuleBasedCollator.UCA_.m_UCA_version_)) {
|
||||
throw new RuntimeException(INV_UCA_VERSION_MISMATCH_);
|
||||
}
|
||||
} else {
|
||||
throw new RuntimeException(UCA_NOT_INSTANTIATED_);
|
||||
}
|
||||
|
||||
INVERSE_UCA_ = temp;
|
||||
}
|
||||
|
||||
// package private methods -----------------------------------------------
|
||||
|
|
|
@ -5,20 +5,26 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollatorReader.java,v $
|
||||
* $Date: 2003/11/11 20:12:31 $
|
||||
* $Revision: 1.16 $
|
||||
* $Date: 2004/02/06 21:54:02 $
|
||||
* $Revision: 1.17 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import com.ibm.icu.impl.ICUBinary;
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
import com.ibm.icu.impl.IntTrie;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.util.VersionInfo;
|
||||
import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;
|
||||
import com.ibm.icu.text.RuleBasedCollator.UCAConstants;
|
||||
|
||||
/**
|
||||
* <p>Internal reader class for ICU data file uca.icu containing
|
||||
|
@ -36,16 +42,59 @@ import com.ibm.icu.util.VersionInfo;
|
|||
|
||||
final class CollatorReader
|
||||
{
|
||||
static char[] read(RuleBasedCollator rbc, UCAConstants ucac) throws IOException {
|
||||
InputStream i = ICUData.getRequiredStream("data/ucadata.icu");
|
||||
BufferedInputStream b = new BufferedInputStream(i, 90000);
|
||||
CollatorReader reader = new CollatorReader(b);
|
||||
char[] result = reader.readImp(rbc, ucac);
|
||||
b.close();
|
||||
return result;
|
||||
}
|
||||
|
||||
static void initRBC(RuleBasedCollator rbc, byte[] data) throws IOException {
|
||||
final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
|
||||
|
||||
InputStream i = new ByteArrayInputStream(data);
|
||||
BufferedInputStream b = new BufferedInputStream(i);
|
||||
CollatorReader reader = new CollatorReader(b, false);
|
||||
if (data.length > MIN_BINARY_DATA_SIZE_) {
|
||||
reader.readImp(rbc, null);
|
||||
} else {
|
||||
reader.readHeader(rbc);
|
||||
reader.readOptions(rbc);
|
||||
// duplicating UCA_'s data
|
||||
rbc.setWithUCATables();
|
||||
}
|
||||
}
|
||||
|
||||
static InverseUCA getInverseUCA() throws IOException {
|
||||
InverseUCA result = null;
|
||||
InputStream i = ICUData.getRequiredStream("data/invuca.icu");
|
||||
// try {
|
||||
// String invdat = "/com/ibm/icu/impl/data/invuca.icu";
|
||||
// InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
|
||||
BufferedInputStream b = new BufferedInputStream(i, 110000);
|
||||
result = CollatorReader.readInverseUCA(b);
|
||||
b.close();
|
||||
i.close();
|
||||
return result;
|
||||
// } catch (Exception e) {
|
||||
// throw new RuntimeException(e.getMessage());
|
||||
// }
|
||||
}
|
||||
|
||||
// protected constructor ---------------------------------------------
|
||||
|
||||
/**
|
||||
* <p>Protected constructor.</p>
|
||||
* @param inputStream ICU callator file input stream
|
||||
* @param inputStream ICU collator file input stream
|
||||
* @exception IOException throw if data file fails authentication
|
||||
* @draft 2.1
|
||||
*/
|
||||
protected CollatorReader(InputStream inputStream) throws IOException
|
||||
private CollatorReader(InputStream inputStream) throws IOException
|
||||
{
|
||||
this(inputStream, true);
|
||||
/*
|
||||
byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
|
||||
// weiv: check that we have the correct Unicode version in
|
||||
// binary files
|
||||
|
@ -55,6 +104,7 @@ final class CollatorReader
|
|||
throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
|
||||
}
|
||||
m_dataInputStream_ = new DataInputStream(inputStream);
|
||||
*/
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -64,7 +114,7 @@ final class CollatorReader
|
|||
* @exception IOException throw if data file fails authentication
|
||||
* @draft 2.1
|
||||
*/
|
||||
protected CollatorReader(InputStream inputStream, boolean readICUHeader)
|
||||
private CollatorReader(InputStream inputStream, boolean readICUHeader)
|
||||
throws IOException
|
||||
{
|
||||
if (readICUHeader) {
|
||||
|
@ -89,7 +139,7 @@ final class CollatorReader
|
|||
* @param rbc RuleBasedCollator to populate with header information
|
||||
* @exception IOException thrown when there's a data error.
|
||||
*/
|
||||
protected void readHeader(RuleBasedCollator rbc) throws IOException
|
||||
private void readHeader(RuleBasedCollator rbc) throws IOException
|
||||
{
|
||||
m_size_ = m_dataInputStream_.readInt();
|
||||
// all the offsets are in bytes
|
||||
|
@ -192,7 +242,7 @@ final class CollatorReader
|
|||
* @exception IOException thrown when there's a data error.
|
||||
* @draft 2.2
|
||||
*/
|
||||
protected void readOptions(RuleBasedCollator rbc) throws IOException
|
||||
private void readOptions(RuleBasedCollator rbc) throws IOException
|
||||
{
|
||||
int readcount = 0;
|
||||
rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();
|
||||
|
@ -245,7 +295,7 @@ final class CollatorReader
|
|||
* @exception IOException thrown when there's a data error.
|
||||
* @draft 2.2
|
||||
*/
|
||||
protected char[] read(RuleBasedCollator rbc,
|
||||
private char[] readImp(RuleBasedCollator rbc,
|
||||
RuleBasedCollator.UCAConstants UCAConst)
|
||||
throws IOException
|
||||
{
|
||||
|
@ -421,7 +471,7 @@ final class CollatorReader
|
|||
* @exception IOException thrown when error occurs while reading the
|
||||
* inverse uca
|
||||
*/
|
||||
protected static CollationParsedRuleBuilder.InverseUCA readInverseUCA(
|
||||
private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(
|
||||
InputStream inputStream)
|
||||
throws IOException
|
||||
{
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/IDNA.java,v $
|
||||
* $Date: 2003/12/02 01:34:32 $
|
||||
* $Revision: 1.5 $
|
||||
* $Date: 2004/02/06 21:54:03 $
|
||||
* $Revision: 1.6 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -14,7 +14,7 @@ package com.ibm.icu.text;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import com.ibm.icu.impl.LocaleUtility;
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -88,7 +88,7 @@ public final class IDNA {
|
|||
/* private constructor to prevent construction of the object */
|
||||
private IDNA(){
|
||||
try{
|
||||
InputStream stream = LocaleUtility.getImplDataResourceAsStream("uidna.spp");
|
||||
InputStream stream = ICUData.getRequiredStream("data/uidna.spp");
|
||||
namePrep = new StringPrep(stream);
|
||||
stream.close();
|
||||
}catch (IOException e){
|
||||
|
|
|
@ -5,16 +5,13 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java,v $
|
||||
* $Date: 2004/01/28 02:05:51 $
|
||||
* $Revision: 1.57 $
|
||||
* $Date: 2004/02/06 21:54:02 $
|
||||
* $Revision: 1.58 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.util.Locale;
|
||||
import java.util.ResourceBundle;
|
||||
import java.util.Arrays;
|
||||
|
@ -1561,6 +1558,8 @@ public final class RuleBasedCollator extends Collator
|
|||
{
|
||||
UCA_ = new RuleBasedCollator();
|
||||
UCA_CONSTANTS_ = new UCAConstants();
|
||||
UCA_CONTRACTIONS_ = CollatorReader.read(UCA_, UCA_CONSTANTS_);
|
||||
/*
|
||||
InputStream i = UCA_.getClass().getResourceAsStream(
|
||||
"/com/ibm/icu/impl/data/ucadata.icu");
|
||||
|
||||
|
@ -1569,6 +1568,7 @@ public final class RuleBasedCollator extends Collator
|
|||
UCA_CONTRACTIONS_ = reader.read(UCA_, UCA_CONSTANTS_);
|
||||
b.close();
|
||||
i.close();
|
||||
*/
|
||||
// called before doing canonical closure for the UCA.
|
||||
impCEGen_ = new ImplicitCEGenerator(UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_, UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_);
|
||||
// IMPLICIT_BASE_BYTE_ = UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_;
|
||||
|
@ -1641,10 +1641,13 @@ public final class RuleBasedCollator extends Collator
|
|||
if(rules[0][1] instanceof byte[]){
|
||||
m_rules_ = (String)rules[1][1];
|
||||
byte map[] = (byte [])rules[0][1];
|
||||
CollatorReader.initRBC(this, map);
|
||||
/*
|
||||
BufferedInputStream input =
|
||||
new BufferedInputStream(
|
||||
new ByteArrayInputStream(map));
|
||||
CollatorReader reader = new CollatorReader(input, false);
|
||||
/*
|
||||
CollatorReader reader = new CollatorReader(input, false);
|
||||
if (map.length > MIN_BINARY_DATA_SIZE_) {
|
||||
reader.read(this, null);
|
||||
}
|
||||
|
@ -1654,6 +1657,7 @@ public final class RuleBasedCollator extends Collator
|
|||
// duplicating UCA_'s data
|
||||
setWithUCATables();
|
||||
}
|
||||
*/
|
||||
// at this point, we have read in the collator
|
||||
// now we need to check whether the binary image has
|
||||
// the right UCA and other versions
|
||||
|
@ -1945,7 +1949,7 @@ public final class RuleBasedCollator extends Collator
|
|||
* Minimum size required for the binary collation data in bytes.
|
||||
* Size of UCA header + size of options to 4 bytes
|
||||
*/
|
||||
private static final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
|
||||
//private static final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
|
||||
|
||||
/**
|
||||
* If this collator is to generate only simple tertiaries for fast path
|
||||
|
|
Loading…
Add table
Reference in a new issue