From b4c271507304a61680920f25ed18d87b5075ecb5 Mon Sep 17 00:00:00 2001 From: John Fitzpatrick Date: Fri, 10 Mar 2000 00:20:05 +0000 Subject: [PATCH] Fixed test failure. ClassCastException because the dictionary name wasn't a URL as expected by BreakIterator X-SVN-Rev: 934 --- .../rbbi/BreakIteratorRules_en_US_TEST.java | 166 +++++++++--------- .../ibm/icu/dev/test/rbbi/SimpleBITest.java | 6 +- .../rbbi/BreakIteratorRules_en_US_TEST.java | 166 +++++++++--------- icu4j/src/com/ibm/test/rbbi/SimpleBITest.java | 6 +- 4 files changed, 180 insertions(+), 164 deletions(-) diff --git a/icu4j/src/com/ibm/icu/dev/test/rbbi/BreakIteratorRules_en_US_TEST.java b/icu4j/src/com/ibm/icu/dev/test/rbbi/BreakIteratorRules_en_US_TEST.java index 61ee141f39d..9704878e0a8 100755 --- a/icu4j/src/com/ibm/icu/dev/test/rbbi/BreakIteratorRules_en_US_TEST.java +++ b/icu4j/src/com/ibm/icu/dev/test/rbbi/BreakIteratorRules_en_US_TEST.java @@ -13,6 +13,7 @@ package com.ibm.text.resources; import java.util.ListResourceBundle; +import java.net.URL; /** * This resource bundle is included for testing and demonstration purposes only. @@ -22,84 +23,91 @@ import java.util.ListResourceBundle; */ public class BreakIteratorRules_en_US_TEST extends ListResourceBundle { public Object[][] getContents() { - return contents; - } + URL url = getClass().getResource("english.dict"); - static final Object[][] contents = { - // names of classes to instantiate for the different kinds of break - // iterator. Notice we're now using DictionaryBasedBreakIterator - // for word and line breaking. - { "BreakIteratorClasses", - new String[] { "RuleBasedBreakIterator", // character-break iterator class - "DictionaryBasedBreakIterator", // word-break iterator class - "DictionaryBasedBreakIterator", // line-break iterator class - "RuleBasedBreakIterator" } // sentence-break iterator class - }, - - // These are the same word-breaking rules as are specified in the default - // resource, except that the Latin letters, apostrophe, and hyphen are - // specified as dictionary characters - { "WordBreakRules", - "=[:Mn::Me::Cf:];" - + "=[a-zA-z\\'\\-];" - + "=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];" - + "=[\u30a1-\u30fa];" - + "=[\u3041-\u3094];" - + "=[\u3099-\u309c];" - + "=[:L:^[]];" - + "=[:N:];" - + "=[:Pd:\u00ad\u2027\\\"\\\'\\.];" - + "=[\\\"\\\'\\,\u066b\\.];" - + "=[:Sc:\\#\\.^\u00a2];" - + "=[\\%\\&\u00a2\u066a\u2030\u2031];" - + "=[\n\u000c\u2028\u2029];" - + "=[:Zs:\t];" - + "=(*(*)*|[a-zA-Z][a-z\\'\\-]*);" - + "=(*(*)*);" - + ".;" - + "{}()*{{}};" - + "()*{{}};" - + "*{\r}{};" - + "[]*;" - + "[]*;" - + "*;" }, - - // These are the same line-breaking rules as are specified in the default - // resource, except that the Latin letters, apostrophe, and hyphen are - // specified as dictionary characters - { "LineBreakRules", - "=[:Mn::Me::Cf:];" - + "=[a-zA-z\\'\\-];" - + "=[\u0003\t\n\f\u2028\u2029];" - + "=[\u00a0\u2007\u2011\ufeff];" - + "=[:Zs::Cc:^[\r]];" - + "=[:Pd:\u00ad^];" - + "=[:Sc::Ps:^\u00a2];" - + "=[:Pe:\\!\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034\u2103" - + "\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063" - + "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9" - + "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0e" - + "\uff1f];" - + "=[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa^[]];" - + "=[:Nd::No:];" - + "=[\\.\\,];" - + "=[^[\r]];" - + "=([]**(*)*);" - + "=(*|||[a-zA-Z][a-z\\'\\-]*);" - + "=((*|*)*);" - + "=(*);" - + "(*)*{\r}{};" }, - - // these two resources specify the pathnames of the dictionary files to - // use for word breaking and line breaking. Both currently refer to - // a file called english.dict placed in com\ibm\text\resources - // somewhere in the class path. It's important to note that - // english.dict was created for testing purposes only, and doesn't - // come anywhere close to being an exhaustive dictionary of English - // words (basically, it contains all the words in the Declaration of - // Independence, and the Revised Standard Version of the book of Genesis, - // plus a few other words thrown in to show more interesting cases). - { "WordBreakDictionary", "com\\ibm\\text\\resources\\english.dict" }, - { "LineBreakDictionary", "com\\ibm\\text\\resources\\english.dict" } - }; + // if dictionary wasn't found, then this resource bundle doesn't have + // much to contribute... + if (url == null) { + return new Object[0][0]; + } + return new Object[][] { + // names of classes to instantiate for the different kinds of break + // iterator. Notice we're now using DictionaryBasedBreakIterator + // for word and line breaking. + { "BreakIteratorClasses", + new String[] { "RuleBasedBreakIterator", // character-break iterator class + "DictionaryBasedBreakIterator", // word-break iterator class + "DictionaryBasedBreakIterator", // line-break iterator class + "RuleBasedBreakIterator" } // sentence-break iterator class + }, + + // These are the same word-breaking rules as are specified in the default + // resource, except that the Latin letters, apostrophe, and hyphen are + // specified as dictionary characters + { "WordBreakRules", + "$ignore=[[:Mn:][:Me:][:Cf:]];" + + "dictionary=[a-zA-z\\'\\-];" + + "kanji=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];" + + "kata=[\u30a1-\u30fa];" + + "hira=[\u3041-\u3094];" + + "cjk-diacrit=[\u3099-\u309c];" + + "let=[[:L:]^[{kanji}{kata}{hira}{cjk-diacrit}{dictionary}]];" + + "dgt=[[:N:]];" + + "mid-word=[[:Pd:]\u00ad\u2027\\\"\\\'\\.];" + + "mid-num=[\\\"\\\'\\,\u066b\\.];" + + "pre-num=[[:Sc:]\\#\\.^\u00a2];" + + "post-num=[\\%\\&\u00a2\u066a\u2030\u2031];" + + "ls=[\n\u000c\u2028\u2029];" + + "ws=[[:Zs:]\t];" + + "word=({let}{let}*({mid-word}{let}{let}*)*|[a-zA-Z][a-z\\'\\-]*);" + + "number=({dgt}{dgt}*({mid-num}{dgt}{dgt}*)*);" + + ".;" + + "{{word}}({number}{word})*{{number}{{post-num}}};" + + "{pre-num}({number}{word})*{{number}{{post-num}}};" + + "{ws}*{\r}{{ls}};" + + "[{kata}{cjk-diacrit}]*;" + + "[{hira}{cjk-diacrit}]*;" + + "{kanji}*;" }, + + // These are the same line-breaking rules as are specified in the default + // resource, except that the Latin letters, apostrophe, and hyphen are + // specified as dictionary characters + { "LineBreakRules", + "=[:Mn::Me::Cf:];" + + "=[a-zA-z\\'\\-];" + + "=[\u0003\t\n\f\u2028\u2029];" + + "=[\u00a0\u2007\u2011\ufeff];" + + "=[:Zs::Cc:^[\r]];" + + "=[:Pd:\u00ad^];" + + "=[:Sc::Ps:^\u00a2];" + + "=[:Pe:\\!\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034\u2103" + + "\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063" + + "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9" + + "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0e" + + "\uff1f];" + + "=[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa^[]];" + + "=[:Nd::No:];" + + "=[\\.\\,];" + + "=[^[\r]];" + + "=([]**(*)*);" + + "=(*|||[a-zA-Z][a-z\\'\\-]*);" + + "=((*|*)*);" + + "=(*);" + + "(*)*{\r}{};" }, + + // these two resources specify the pathnames of the dictionary files to + // use for word breaking and line breaking. Both currently refer to + // a file called english.dict placed in com\ibm\text\resources + // somewhere in the class path. It's important to note that + // english.dict was created for testing purposes only, and doesn't + // come anywhere close to being an exhaustive dictionary of English + // words (basically, it contains all the words in the Declaration of + // Independence, and the Revised Standard Version of the book of Genesis, + // plus a few other words thrown in to show more interesting cases). + // { "WordBreakDictionary", "com\\ibm\\text\\resources\\english.dict" }, + // { "LineBreakDictionary", "com\\ibm\\text\\resources\\english.dict" } + { "WordBreakDictionary", url }, + { "LineBreakDictionary", url } + }; + } } diff --git a/icu4j/src/com/ibm/icu/dev/test/rbbi/SimpleBITest.java b/icu4j/src/com/ibm/icu/dev/test/rbbi/SimpleBITest.java index 898063870ca..d6b75925bc9 100755 --- a/icu4j/src/com/ibm/icu/dev/test/rbbi/SimpleBITest.java +++ b/icu4j/src/com/ibm/icu/dev/test/rbbi/SimpleBITest.java @@ -107,19 +107,19 @@ public class SimpleBITest extends TestFmwk{ public void testWordBreak() throws Exception { BreakIterator wordBreak = BreakIterator.getWordInstance(new Locale("en", "US", "TEST")); int breaks = doTest(wordBreak); - errln(String.valueOf(breaks)); + logln(String.valueOf(breaks)); } public void testLineBreak() throws Exception { BreakIterator lineBreak = BreakIterator.getLineInstance(new Locale("en", "US", "TEST")); int breaks = doTest(lineBreak); - errln(String.valueOf(breaks)); + logln(String.valueOf(breaks)); } public void testSentenceBreak() throws Exception { BreakIterator sentenceBreak = BreakIterator.getSentenceInstance(new Locale("en", "US", "TEST")); int breaks = doTest(sentenceBreak); - errln(String.valueOf(breaks)); + logln(String.valueOf(breaks)); } private int doTest(BreakIterator bi) { diff --git a/icu4j/src/com/ibm/test/rbbi/BreakIteratorRules_en_US_TEST.java b/icu4j/src/com/ibm/test/rbbi/BreakIteratorRules_en_US_TEST.java index 61ee141f39d..9704878e0a8 100755 --- a/icu4j/src/com/ibm/test/rbbi/BreakIteratorRules_en_US_TEST.java +++ b/icu4j/src/com/ibm/test/rbbi/BreakIteratorRules_en_US_TEST.java @@ -13,6 +13,7 @@ package com.ibm.text.resources; import java.util.ListResourceBundle; +import java.net.URL; /** * This resource bundle is included for testing and demonstration purposes only. @@ -22,84 +23,91 @@ import java.util.ListResourceBundle; */ public class BreakIteratorRules_en_US_TEST extends ListResourceBundle { public Object[][] getContents() { - return contents; - } + URL url = getClass().getResource("english.dict"); - static final Object[][] contents = { - // names of classes to instantiate for the different kinds of break - // iterator. Notice we're now using DictionaryBasedBreakIterator - // for word and line breaking. - { "BreakIteratorClasses", - new String[] { "RuleBasedBreakIterator", // character-break iterator class - "DictionaryBasedBreakIterator", // word-break iterator class - "DictionaryBasedBreakIterator", // line-break iterator class - "RuleBasedBreakIterator" } // sentence-break iterator class - }, - - // These are the same word-breaking rules as are specified in the default - // resource, except that the Latin letters, apostrophe, and hyphen are - // specified as dictionary characters - { "WordBreakRules", - "=[:Mn::Me::Cf:];" - + "=[a-zA-z\\'\\-];" - + "=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];" - + "=[\u30a1-\u30fa];" - + "=[\u3041-\u3094];" - + "=[\u3099-\u309c];" - + "=[:L:^[]];" - + "=[:N:];" - + "=[:Pd:\u00ad\u2027\\\"\\\'\\.];" - + "=[\\\"\\\'\\,\u066b\\.];" - + "=[:Sc:\\#\\.^\u00a2];" - + "=[\\%\\&\u00a2\u066a\u2030\u2031];" - + "=[\n\u000c\u2028\u2029];" - + "=[:Zs:\t];" - + "=(*(*)*|[a-zA-Z][a-z\\'\\-]*);" - + "=(*(*)*);" - + ".;" - + "{}()*{{}};" - + "()*{{}};" - + "*{\r}{};" - + "[]*;" - + "[]*;" - + "*;" }, - - // These are the same line-breaking rules as are specified in the default - // resource, except that the Latin letters, apostrophe, and hyphen are - // specified as dictionary characters - { "LineBreakRules", - "=[:Mn::Me::Cf:];" - + "=[a-zA-z\\'\\-];" - + "=[\u0003\t\n\f\u2028\u2029];" - + "=[\u00a0\u2007\u2011\ufeff];" - + "=[:Zs::Cc:^[\r]];" - + "=[:Pd:\u00ad^];" - + "=[:Sc::Ps:^\u00a2];" - + "=[:Pe:\\!\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034\u2103" - + "\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063" - + "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9" - + "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0e" - + "\uff1f];" - + "=[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa^[]];" - + "=[:Nd::No:];" - + "=[\\.\\,];" - + "=[^[\r]];" - + "=([]**(*)*);" - + "=(*|||[a-zA-Z][a-z\\'\\-]*);" - + "=((*|*)*);" - + "=(*);" - + "(*)*{\r}{};" }, - - // these two resources specify the pathnames of the dictionary files to - // use for word breaking and line breaking. Both currently refer to - // a file called english.dict placed in com\ibm\text\resources - // somewhere in the class path. It's important to note that - // english.dict was created for testing purposes only, and doesn't - // come anywhere close to being an exhaustive dictionary of English - // words (basically, it contains all the words in the Declaration of - // Independence, and the Revised Standard Version of the book of Genesis, - // plus a few other words thrown in to show more interesting cases). - { "WordBreakDictionary", "com\\ibm\\text\\resources\\english.dict" }, - { "LineBreakDictionary", "com\\ibm\\text\\resources\\english.dict" } - }; + // if dictionary wasn't found, then this resource bundle doesn't have + // much to contribute... + if (url == null) { + return new Object[0][0]; + } + return new Object[][] { + // names of classes to instantiate for the different kinds of break + // iterator. Notice we're now using DictionaryBasedBreakIterator + // for word and line breaking. + { "BreakIteratorClasses", + new String[] { "RuleBasedBreakIterator", // character-break iterator class + "DictionaryBasedBreakIterator", // word-break iterator class + "DictionaryBasedBreakIterator", // line-break iterator class + "RuleBasedBreakIterator" } // sentence-break iterator class + }, + + // These are the same word-breaking rules as are specified in the default + // resource, except that the Latin letters, apostrophe, and hyphen are + // specified as dictionary characters + { "WordBreakRules", + "$ignore=[[:Mn:][:Me:][:Cf:]];" + + "dictionary=[a-zA-z\\'\\-];" + + "kanji=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];" + + "kata=[\u30a1-\u30fa];" + + "hira=[\u3041-\u3094];" + + "cjk-diacrit=[\u3099-\u309c];" + + "let=[[:L:]^[{kanji}{kata}{hira}{cjk-diacrit}{dictionary}]];" + + "dgt=[[:N:]];" + + "mid-word=[[:Pd:]\u00ad\u2027\\\"\\\'\\.];" + + "mid-num=[\\\"\\\'\\,\u066b\\.];" + + "pre-num=[[:Sc:]\\#\\.^\u00a2];" + + "post-num=[\\%\\&\u00a2\u066a\u2030\u2031];" + + "ls=[\n\u000c\u2028\u2029];" + + "ws=[[:Zs:]\t];" + + "word=({let}{let}*({mid-word}{let}{let}*)*|[a-zA-Z][a-z\\'\\-]*);" + + "number=({dgt}{dgt}*({mid-num}{dgt}{dgt}*)*);" + + ".;" + + "{{word}}({number}{word})*{{number}{{post-num}}};" + + "{pre-num}({number}{word})*{{number}{{post-num}}};" + + "{ws}*{\r}{{ls}};" + + "[{kata}{cjk-diacrit}]*;" + + "[{hira}{cjk-diacrit}]*;" + + "{kanji}*;" }, + + // These are the same line-breaking rules as are specified in the default + // resource, except that the Latin letters, apostrophe, and hyphen are + // specified as dictionary characters + { "LineBreakRules", + "=[:Mn::Me::Cf:];" + + "=[a-zA-z\\'\\-];" + + "=[\u0003\t\n\f\u2028\u2029];" + + "=[\u00a0\u2007\u2011\ufeff];" + + "=[:Zs::Cc:^[\r]];" + + "=[:Pd:\u00ad^];" + + "=[:Sc::Ps:^\u00a2];" + + "=[:Pe:\\!\\%\\.\\,\\:\\;\\?\u00a2\u00b0\u066a\u2030-\u2034\u2103" + + "\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063" + + "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9" + + "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0e" + + "\uff1f];" + + "=[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa^[]];" + + "=[:Nd::No:];" + + "=[\\.\\,];" + + "=[^[\r]];" + + "=([]**(*)*);" + + "=(*|||[a-zA-Z][a-z\\'\\-]*);" + + "=((*|*)*);" + + "=(*);" + + "(*)*{\r}{};" }, + + // these two resources specify the pathnames of the dictionary files to + // use for word breaking and line breaking. Both currently refer to + // a file called english.dict placed in com\ibm\text\resources + // somewhere in the class path. It's important to note that + // english.dict was created for testing purposes only, and doesn't + // come anywhere close to being an exhaustive dictionary of English + // words (basically, it contains all the words in the Declaration of + // Independence, and the Revised Standard Version of the book of Genesis, + // plus a few other words thrown in to show more interesting cases). + // { "WordBreakDictionary", "com\\ibm\\text\\resources\\english.dict" }, + // { "LineBreakDictionary", "com\\ibm\\text\\resources\\english.dict" } + { "WordBreakDictionary", url }, + { "LineBreakDictionary", url } + }; + } } diff --git a/icu4j/src/com/ibm/test/rbbi/SimpleBITest.java b/icu4j/src/com/ibm/test/rbbi/SimpleBITest.java index 898063870ca..d6b75925bc9 100755 --- a/icu4j/src/com/ibm/test/rbbi/SimpleBITest.java +++ b/icu4j/src/com/ibm/test/rbbi/SimpleBITest.java @@ -107,19 +107,19 @@ public class SimpleBITest extends TestFmwk{ public void testWordBreak() throws Exception { BreakIterator wordBreak = BreakIterator.getWordInstance(new Locale("en", "US", "TEST")); int breaks = doTest(wordBreak); - errln(String.valueOf(breaks)); + logln(String.valueOf(breaks)); } public void testLineBreak() throws Exception { BreakIterator lineBreak = BreakIterator.getLineInstance(new Locale("en", "US", "TEST")); int breaks = doTest(lineBreak); - errln(String.valueOf(breaks)); + logln(String.valueOf(breaks)); } public void testSentenceBreak() throws Exception { BreakIterator sentenceBreak = BreakIterator.getSentenceInstance(new Locale("en", "US", "TEST")); int breaks = doTest(sentenceBreak); - errln(String.valueOf(breaks)); + logln(String.valueOf(breaks)); } private int doTest(BreakIterator bi) {