mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 17:01:16 +00:00
ICU-9014 and ICU-9015 scx defaults to {sc}, and return that efficiently
X-SVN-Rev: 31328
This commit is contained in:
parent
8d2ddad36c
commit
2281643107
3 changed files with 62 additions and 29 deletions
icu4j/main
classes/core/src/com/ibm/icu/lang
tests
core/src/com/ibm/icu/dev/test/lang
translit/src/com/ibm/icu/dev/test/util
|
@ -1062,6 +1062,11 @@ public final class UScript {
|
|||
if(sc==script) {
|
||||
return true;
|
||||
}
|
||||
if(sc>0x7fff) {
|
||||
// Guard against bogus input that would
|
||||
// make us go past the Script_Extensions terminator.
|
||||
return false;
|
||||
}
|
||||
while(sc>scriptExtensions[scx]) {
|
||||
++scx;
|
||||
}
|
||||
|
@ -1070,24 +1075,39 @@ public final class UScript {
|
|||
|
||||
/**
|
||||
* Sets code point c's Script_Extensions as script code integers into the output BitSet.
|
||||
* <ul>
|
||||
* <li>If c does have Script_Extensions, then the return value is
|
||||
* the negative number of Script_Extensions codes (= -set.cardinality());
|
||||
* in this case, the Script property value
|
||||
* (normally Common or Inherited) is not included in the set.
|
||||
* <li>If c does not have Script_Extensions, then the one Script code is put into the set
|
||||
* and also returned.
|
||||
* <li>If c is not a valid code point, then the one {@link #UNKNOWN} code is put into the set
|
||||
* and also returned.
|
||||
* </ul>
|
||||
* In other words, if the return value is non-negative, it is c's single Script code
|
||||
* and the set contains exactly this Script code.
|
||||
* If the return value is -n, then the set contains c's n>=2 Script_Extensions script codes.
|
||||
*
|
||||
* Some characters are commonly used in multiple scripts.
|
||||
* <p>Some characters are commonly used in multiple scripts.
|
||||
* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
|
||||
*
|
||||
* The Script_Extensions property is provisional. It may be modified or removed
|
||||
* <p>The Script_Extensions property is provisional. It may be modified or removed
|
||||
* in future versions of the Unicode Standard, and thus in ICU.
|
||||
* @param c code point
|
||||
* @param set set of script code integers; will be cleared, then bits are set
|
||||
* corresponding to c's Script_Extensions
|
||||
* @return set
|
||||
* @draft ICU 4.6
|
||||
* @return negative number of script codes in c's Script_Extensions,
|
||||
* or the non-negative single Script value
|
||||
* @draft ICU 49
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final BitSet getScriptExtensions(int c, BitSet set) {
|
||||
public static final int getScriptExtensions(int c, BitSet set) {
|
||||
set.clear();
|
||||
int scriptX=UCharacterProperty.INSTANCE.getAdditional(c, 0)&UCharacterProperty.SCRIPT_X_MASK;
|
||||
if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) {
|
||||
return set;
|
||||
set.set(scriptX);
|
||||
return scriptX;
|
||||
}
|
||||
|
||||
char[] scriptExtensions=UCharacterProperty.INSTANCE.m_scriptExtensions_;
|
||||
|
@ -1095,12 +1115,15 @@ public final class UScript {
|
|||
if(scriptX>=UCharacterProperty.SCRIPT_X_WITH_OTHER) {
|
||||
scx=scriptExtensions[scx+1];
|
||||
}
|
||||
int length=0;
|
||||
int sx;
|
||||
do {
|
||||
sx=scriptExtensions[scx++];
|
||||
set.set(sx&0x7fff);
|
||||
++length;
|
||||
} while(sx<0x8000);
|
||||
return set;
|
||||
// length==set.cardinality()
|
||||
return -length;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -338,7 +338,7 @@ public class TestUScript extends TestFmwk {
|
|||
!UScript.hasScript(0x063f, UScript.SYRIAC) &&
|
||||
!UScript.hasScript(0x063f, UScript.THAANA))
|
||||
) {
|
||||
errln("UScript.hasScript(U+063F, ...) is wrong\n");
|
||||
errln("UScript.hasScript(U+063F, ...) is wrong");
|
||||
}
|
||||
if(!(
|
||||
UScript.hasScript(0x0640, UScript.COMMON) && /* main Script value */
|
||||
|
@ -346,7 +346,7 @@ public class TestUScript extends TestFmwk {
|
|||
UScript.hasScript(0x0640, UScript.SYRIAC) &&
|
||||
!UScript.hasScript(0x0640, UScript.THAANA))
|
||||
) {
|
||||
errln("UScript.hasScript(U+0640, ...) is wrong\n");
|
||||
errln("UScript.hasScript(U+0640, ...) is wrong");
|
||||
}
|
||||
if(!(
|
||||
UScript.hasScript(0x0650, UScript.INHERITED) && /* main Script value */
|
||||
|
@ -354,7 +354,7 @@ public class TestUScript extends TestFmwk {
|
|||
UScript.hasScript(0x0650, UScript.SYRIAC) &&
|
||||
!UScript.hasScript(0x0650, UScript.THAANA))
|
||||
) {
|
||||
errln("UScript.hasScript(U+0650, ...) is wrong\n");
|
||||
errln("UScript.hasScript(U+0650, ...) is wrong");
|
||||
}
|
||||
if(!(
|
||||
UScript.hasScript(0x0660, UScript.COMMON) && /* main Script value */
|
||||
|
@ -362,7 +362,7 @@ public class TestUScript extends TestFmwk {
|
|||
!UScript.hasScript(0x0660, UScript.SYRIAC) &&
|
||||
UScript.hasScript(0x0660, UScript.THAANA))
|
||||
) {
|
||||
errln("UScript.hasScript(U+0660, ...) is wrong\n");
|
||||
errln("UScript.hasScript(U+0660, ...) is wrong");
|
||||
}
|
||||
if(!(
|
||||
!UScript.hasScript(0xfdf2, UScript.COMMON) &&
|
||||
|
@ -370,28 +370,43 @@ public class TestUScript extends TestFmwk {
|
|||
!UScript.hasScript(0xfdf2, UScript.SYRIAC) &&
|
||||
UScript.hasScript(0xfdf2, UScript.THAANA))
|
||||
) {
|
||||
errln("UScript.hasScript(U+FDF2, ...) is wrong\n");
|
||||
errln("UScript.hasScript(U+FDF2, ...) is wrong");
|
||||
}
|
||||
if(UScript.hasScript(0x0640, 0xaffe)) {
|
||||
// An unguarded implementation might go into an infinite loop.
|
||||
errln("UScript.hasScript(U+0640, bogus 0xaffe) is wrong");
|
||||
}
|
||||
}
|
||||
|
||||
public void TestGetScriptExtensions() {
|
||||
BitSet scripts=new BitSet(UScript.CODE_LIMIT);
|
||||
|
||||
/* normal usage */
|
||||
if(!UScript.getScriptExtensions(0x063f, scripts).isEmpty()) {
|
||||
errln("UScript.getScriptExtensions(U+063F) is not empty");
|
||||
/* invalid code points */
|
||||
if(UScript.getScriptExtensions(-1, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 ||
|
||||
!scripts.get(UScript.UNKNOWN)) {
|
||||
errln("UScript.getScriptExtensions(-1) is not {UNKNOWN}");
|
||||
}
|
||||
if(UScript.getScriptExtensions(0x0640, scripts).cardinality()!=3 ||
|
||||
if(UScript.getScriptExtensions(0x110000, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 ||
|
||||
!scripts.get(UScript.UNKNOWN)) {
|
||||
errln("UScript.getScriptExtensions(0x110000) is not {UNKNOWN}");
|
||||
}
|
||||
|
||||
/* normal usage */
|
||||
if(UScript.getScriptExtensions(0x063f, scripts)!=UScript.ARABIC || scripts.cardinality()!=1 ||
|
||||
!scripts.get(UScript.ARABIC)) {
|
||||
errln("UScript.getScriptExtensions(U+063F) is not {ARABIC}");
|
||||
}
|
||||
if(UScript.getScriptExtensions(0x0640, scripts)!=-3 || scripts.cardinality()!=3 ||
|
||||
!scripts.get(UScript.ARABIC) || !scripts.get(UScript.SYRIAC) || !scripts.get(UScript.MANDAIC)
|
||||
) {
|
||||
errln("UScript.getScriptExtensions(U+0640) failed");
|
||||
}
|
||||
UScript.getScriptExtensions(0xfdf2, scripts);
|
||||
if(scripts.cardinality()!=2 || !scripts.get(UScript.ARABIC) || !scripts.get(UScript.THAANA)) {
|
||||
if(UScript.getScriptExtensions(0xfdf2, scripts)!=-2 || scripts.cardinality()!=2 ||
|
||||
!scripts.get(UScript.ARABIC) || !scripts.get(UScript.THAANA)) {
|
||||
errln("UScript.getScriptExtensions(U+FDF2) failed");
|
||||
}
|
||||
UScript.getScriptExtensions(0xff65, scripts);
|
||||
if(scripts.cardinality()!=6 || !scripts.get(UScript.BOPOMOFO) || !scripts.get(UScript.YI)) {
|
||||
if(UScript.getScriptExtensions(0xff65, scripts)!=-6 || scripts.cardinality()!=6 ||
|
||||
!scripts.get(UScript.BOPOMOFO) || !scripts.get(UScript.YI)) {
|
||||
errln("UScript.getScriptExtensions(U+FF65) failed");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2011, International Business Machines Corporation and *
|
||||
* Copyright (C) 2002-2012, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -452,15 +452,10 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
|
|||
}
|
||||
|
||||
static BitSet BITSET = new BitSet();
|
||||
/**
|
||||
* @param codePoint
|
||||
* @return
|
||||
*/
|
||||
public static synchronized String getStringScriptExtensions(int codePoint) {
|
||||
UScript.getScriptExtensions(codePoint, BITSET);
|
||||
if (BITSET.cardinality() == 0) {
|
||||
int scriptCode = UScript.getScript(codePoint);
|
||||
return UScript.getName(scriptCode);
|
||||
int result = UScript.getScriptExtensions(codePoint, BITSET);
|
||||
if (result >= 0) {
|
||||
return UScript.getName(result);
|
||||
}
|
||||
TreeMap<String,String> sorted = new TreeMap<String,String>();
|
||||
for (int scriptCode = BITSET.nextSetBit(0); scriptCode >= 0; scriptCode = BITSET.nextSetBit(scriptCode+1)) {
|
||||
|
|
Loading…
Add table
Reference in a new issue