ICU-9014 and ICU-9015 scx defaults to {sc}, and return that efficiently

X-SVN-Rev: 31328
This commit is contained in:
Markus Scherer 2012-02-03 23:39:45 +00:00
parent 8d2ddad36c
commit 2281643107
3 changed files with 62 additions and 29 deletions
icu4j/main
classes/core/src/com/ibm/icu/lang
tests
core/src/com/ibm/icu/dev/test/lang
translit/src/com/ibm/icu/dev/test/util

View file

@ -1062,6 +1062,11 @@ public final class UScript {
if(sc==script) {
return true;
}
if(sc>0x7fff) {
// Guard against bogus input that would
// make us go past the Script_Extensions terminator.
return false;
}
while(sc>scriptExtensions[scx]) {
++scx;
}
@ -1070,24 +1075,39 @@ public final class UScript {
/**
* Sets code point c's Script_Extensions as script code integers into the output BitSet.
* <ul>
* <li>If c does have Script_Extensions, then the return value is
* the negative number of Script_Extensions codes (= -set.cardinality());
* in this case, the Script property value
* (normally Common or Inherited) is not included in the set.
* <li>If c does not have Script_Extensions, then the one Script code is put into the set
* and also returned.
* <li>If c is not a valid code point, then the one {@link #UNKNOWN} code is put into the set
* and also returned.
* </ul>
* In other words, if the return value is non-negative, it is c's single Script code
* and the set contains exactly this Script code.
* If the return value is -n, then the set contains c's n>=2 Script_Extensions script codes.
*
* Some characters are commonly used in multiple scripts.
* <p>Some characters are commonly used in multiple scripts.
* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
*
* The Script_Extensions property is provisional. It may be modified or removed
* <p>The Script_Extensions property is provisional. It may be modified or removed
* in future versions of the Unicode Standard, and thus in ICU.
* @param c code point
* @param set set of script code integers; will be cleared, then bits are set
* corresponding to c's Script_Extensions
* @return set
* @draft ICU 4.6
* @return negative number of script codes in c's Script_Extensions,
* or the non-negative single Script value
* @draft ICU 49
* @provisional This API might change or be removed in a future release.
*/
public static final BitSet getScriptExtensions(int c, BitSet set) {
public static final int getScriptExtensions(int c, BitSet set) {
set.clear();
int scriptX=UCharacterProperty.INSTANCE.getAdditional(c, 0)&UCharacterProperty.SCRIPT_X_MASK;
if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) {
return set;
set.set(scriptX);
return scriptX;
}
char[] scriptExtensions=UCharacterProperty.INSTANCE.m_scriptExtensions_;
@ -1095,12 +1115,15 @@ public final class UScript {
if(scriptX>=UCharacterProperty.SCRIPT_X_WITH_OTHER) {
scx=scriptExtensions[scx+1];
}
int length=0;
int sx;
do {
sx=scriptExtensions[scx++];
set.set(sx&0x7fff);
++length;
} while(sx<0x8000);
return set;
// length==set.cardinality()
return -length;
}
/**

View file

@ -338,7 +338,7 @@ public class TestUScript extends TestFmwk {
!UScript.hasScript(0x063f, UScript.SYRIAC) &&
!UScript.hasScript(0x063f, UScript.THAANA))
) {
errln("UScript.hasScript(U+063F, ...) is wrong\n");
errln("UScript.hasScript(U+063F, ...) is wrong");
}
if(!(
UScript.hasScript(0x0640, UScript.COMMON) && /* main Script value */
@ -346,7 +346,7 @@ public class TestUScript extends TestFmwk {
UScript.hasScript(0x0640, UScript.SYRIAC) &&
!UScript.hasScript(0x0640, UScript.THAANA))
) {
errln("UScript.hasScript(U+0640, ...) is wrong\n");
errln("UScript.hasScript(U+0640, ...) is wrong");
}
if(!(
UScript.hasScript(0x0650, UScript.INHERITED) && /* main Script value */
@ -354,7 +354,7 @@ public class TestUScript extends TestFmwk {
UScript.hasScript(0x0650, UScript.SYRIAC) &&
!UScript.hasScript(0x0650, UScript.THAANA))
) {
errln("UScript.hasScript(U+0650, ...) is wrong\n");
errln("UScript.hasScript(U+0650, ...) is wrong");
}
if(!(
UScript.hasScript(0x0660, UScript.COMMON) && /* main Script value */
@ -362,7 +362,7 @@ public class TestUScript extends TestFmwk {
!UScript.hasScript(0x0660, UScript.SYRIAC) &&
UScript.hasScript(0x0660, UScript.THAANA))
) {
errln("UScript.hasScript(U+0660, ...) is wrong\n");
errln("UScript.hasScript(U+0660, ...) is wrong");
}
if(!(
!UScript.hasScript(0xfdf2, UScript.COMMON) &&
@ -370,28 +370,43 @@ public class TestUScript extends TestFmwk {
!UScript.hasScript(0xfdf2, UScript.SYRIAC) &&
UScript.hasScript(0xfdf2, UScript.THAANA))
) {
errln("UScript.hasScript(U+FDF2, ...) is wrong\n");
errln("UScript.hasScript(U+FDF2, ...) is wrong");
}
if(UScript.hasScript(0x0640, 0xaffe)) {
// An unguarded implementation might go into an infinite loop.
errln("UScript.hasScript(U+0640, bogus 0xaffe) is wrong");
}
}
public void TestGetScriptExtensions() {
BitSet scripts=new BitSet(UScript.CODE_LIMIT);
/* normal usage */
if(!UScript.getScriptExtensions(0x063f, scripts).isEmpty()) {
errln("UScript.getScriptExtensions(U+063F) is not empty");
/* invalid code points */
if(UScript.getScriptExtensions(-1, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 ||
!scripts.get(UScript.UNKNOWN)) {
errln("UScript.getScriptExtensions(-1) is not {UNKNOWN}");
}
if(UScript.getScriptExtensions(0x0640, scripts).cardinality()!=3 ||
if(UScript.getScriptExtensions(0x110000, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 ||
!scripts.get(UScript.UNKNOWN)) {
errln("UScript.getScriptExtensions(0x110000) is not {UNKNOWN}");
}
/* normal usage */
if(UScript.getScriptExtensions(0x063f, scripts)!=UScript.ARABIC || scripts.cardinality()!=1 ||
!scripts.get(UScript.ARABIC)) {
errln("UScript.getScriptExtensions(U+063F) is not {ARABIC}");
}
if(UScript.getScriptExtensions(0x0640, scripts)!=-3 || scripts.cardinality()!=3 ||
!scripts.get(UScript.ARABIC) || !scripts.get(UScript.SYRIAC) || !scripts.get(UScript.MANDAIC)
) {
errln("UScript.getScriptExtensions(U+0640) failed");
}
UScript.getScriptExtensions(0xfdf2, scripts);
if(scripts.cardinality()!=2 || !scripts.get(UScript.ARABIC) || !scripts.get(UScript.THAANA)) {
if(UScript.getScriptExtensions(0xfdf2, scripts)!=-2 || scripts.cardinality()!=2 ||
!scripts.get(UScript.ARABIC) || !scripts.get(UScript.THAANA)) {
errln("UScript.getScriptExtensions(U+FDF2) failed");
}
UScript.getScriptExtensions(0xff65, scripts);
if(scripts.cardinality()!=6 || !scripts.get(UScript.BOPOMOFO) || !scripts.get(UScript.YI)) {
if(UScript.getScriptExtensions(0xff65, scripts)!=-6 || scripts.cardinality()!=6 ||
!scripts.get(UScript.BOPOMOFO) || !scripts.get(UScript.YI)) {
errln("UScript.getScriptExtensions(U+FF65) failed");
}
}

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2002-2011, International Business Machines Corporation and *
* Copyright (C) 2002-2012, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -452,15 +452,10 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
}
static BitSet BITSET = new BitSet();
/**
* @param codePoint
* @return
*/
public static synchronized String getStringScriptExtensions(int codePoint) {
UScript.getScriptExtensions(codePoint, BITSET);
if (BITSET.cardinality() == 0) {
int scriptCode = UScript.getScript(codePoint);
return UScript.getName(scriptCode);
int result = UScript.getScriptExtensions(codePoint, BITSET);
if (result >= 0) {
return UScript.getName(result);
}
TreeMap<String,String> sorted = new TreeMap<String,String>();
for (int scriptCode = BITSET.nextSetBit(0); scriptCode >= 0; scriptCode = BITSET.nextSetBit(scriptCode+1)) {