mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-21655 widen Java character APIs from char to int arguments
This commit is contained in:
parent
280f0f2a25
commit
a36f06eaae
22 changed files with 241 additions and 162 deletions
|
@ -1551,6 +1551,22 @@ itself public can be placed in different places:
|
|||
4. If it is used by multiple packages, make it public and place the class in
|
||||
`the com.ibm.icu.impl` package.
|
||||
|
||||
### ICU4J API Stability
|
||||
|
||||
General discussion: See [ICU Design / ICU API compatibility](../icu/design.md#icu-api-compatibility).
|
||||
|
||||
Occasionally, we “broaden” or “widen” a Java API by making a parameter broader
|
||||
(e.g., `char` (code unit) to `int` (code point), or `String` to `CharSequence`)
|
||||
or a return type narrower (e.g., `Object` to `UnicodeSet`).
|
||||
|
||||
Such a change is source-compatible but not binary compatible.
|
||||
Before we do this, we need to check with users like Android whether this is ok.
|
||||
For example, in a class that Android exposes via its SDK,
|
||||
Android may need to retain hidden compatibility overloads with the old input types.
|
||||
|
||||
In addition, we should test with code using both the old and new types,
|
||||
so that if someone has such compatibility overloads they all get exercised.
|
||||
|
||||
### Error Handling and Exceptions
|
||||
|
||||
Errors should be indicated by throwing exceptions, not by returning “bogus”
|
||||
|
|
|
@ -326,7 +326,7 @@ class CharsetASCII extends CharsetICU {
|
|||
* if the character is a lead surrogate, we need to call encodeTrail to attempt to match
|
||||
* it up with a trail surrogate. if not, the character is unmappable.
|
||||
*/
|
||||
return (UTF16.isSurrogate((char) ch))
|
||||
return (UTF16.isSurrogate(ch))
|
||||
? encodeTrail(source, (char) ch, flush)
|
||||
: CoderResult.unmappableForLength(1);
|
||||
}
|
||||
|
|
|
@ -473,7 +473,7 @@ class CharsetBOCU1 extends CharsetICU {
|
|||
if(UTF16.isTrailSurrogate(trail)){
|
||||
source.position(source.position()+1);
|
||||
++nextSourceIndex;
|
||||
c=UCharacter.getCodePoint((char)c, trail);
|
||||
c=UCharacter.getCodePoint(c, trail);
|
||||
}
|
||||
} else {
|
||||
/*no more input*/
|
||||
|
@ -518,7 +518,7 @@ class CharsetBOCU1 extends CharsetICU {
|
|||
continue;
|
||||
}
|
||||
|
||||
if(UTF16.isLeadSurrogate((char)c)){
|
||||
if(UTF16.isLeadSurrogate(c)){
|
||||
getTrail(source, target, offsets);
|
||||
if(checkNegative){
|
||||
break;
|
||||
|
|
|
@ -375,8 +375,8 @@ class CharsetCompoundText extends CharsetICU {
|
|||
tmpTargetBuffer.limit(3);
|
||||
|
||||
/* check if the char is a First surrogate */
|
||||
if (UTF16.isSurrogate((char)sourceChar) || gotoGetTrail) {
|
||||
if (UTF16.isLeadSurrogate((char)sourceChar) || gotoGetTrail) {
|
||||
if (UTF16.isSurrogate(sourceChar) || gotoGetTrail) {
|
||||
if (UTF16.isLeadSurrogate(sourceChar) || gotoGetTrail) {
|
||||
// getTrail label
|
||||
/* reset gotoGetTrail flag*/
|
||||
gotoGetTrail = false;
|
||||
|
@ -388,7 +388,7 @@ class CharsetCompoundText extends CharsetICU {
|
|||
source.position(source.position()-1);
|
||||
if (UTF16.isTrailSurrogate(trail)) {
|
||||
source.get();
|
||||
sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
|
||||
sourceChar = UCharacter.getCodePoint(sourceChar, trail);
|
||||
fromUChar32 = 0x00;
|
||||
/* convert this supplementary code point */
|
||||
/* exit this condition tree */
|
||||
|
|
|
@ -342,7 +342,7 @@ class CharsetHZ extends CharsetICU {
|
|||
/* Handle surrogates */
|
||||
/* check if the char is a First surrogate */
|
||||
|
||||
if (UTF16.isSurrogate((char) mySourceChar)) {
|
||||
if (UTF16.isSurrogate(mySourceChar)) {
|
||||
// use that handy handleSurrogates method everyone's been talking about!
|
||||
CoderResult cr = handleSurrogates(source, (char) mySourceChar);
|
||||
return (cr != null) ? cr : CoderResult.unmappableForLength(2);
|
||||
|
|
|
@ -1364,7 +1364,7 @@ class CharsetISCII extends CharsetICU {
|
|||
if (cr.isOverflow()) {
|
||||
break;
|
||||
}
|
||||
} else if (UTF16.isSurrogate((char)sourceChar)) {
|
||||
} else if (UTF16.isSurrogate(sourceChar)) {
|
||||
cr = handleSurrogates(source, (char) sourceChar);
|
||||
return (cr != null) ? cr : CoderResult.unmappableForLength(2);
|
||||
} else {
|
||||
|
|
|
@ -1759,8 +1759,8 @@ class CharsetISO2022 extends CharsetICU {
|
|||
sourceChar = source.get();
|
||||
}
|
||||
/* check if the char is a First surrogate */
|
||||
if (getTrail || UTF16.isSurrogate((char)sourceChar)) {
|
||||
if (getTrail || UTF16.isLeadSurrogate((char)sourceChar)) {
|
||||
if (getTrail || UTF16.isSurrogate(sourceChar)) {
|
||||
if (getTrail || UTF16.isLeadSurrogate(sourceChar)) {
|
||||
// getTrail:
|
||||
if (getTrail) {
|
||||
getTrail = false;
|
||||
|
@ -1773,7 +1773,7 @@ class CharsetISO2022 extends CharsetICU {
|
|||
source.position(source.position()-1);
|
||||
if (UTF16.isTrailSurrogate(trail)) {
|
||||
source.get();
|
||||
sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
|
||||
sourceChar = UCharacter.getCodePoint(sourceChar, trail);
|
||||
fromUChar32 = 0x00;
|
||||
/* convert this supplementary code point */
|
||||
/* exit this condition tree */
|
||||
|
@ -2267,8 +2267,8 @@ class CharsetISO2022 extends CharsetICU {
|
|||
sourceChar = source.get();
|
||||
}
|
||||
/* check if the char is a First surrogate */
|
||||
if (UTF16.isSurrogate((char)sourceChar) || gotoGetTrail) {
|
||||
if (UTF16.isLeadSurrogate((char)sourceChar) || gotoGetTrail) {
|
||||
if (UTF16.isSurrogate(sourceChar) || gotoGetTrail) {
|
||||
if (UTF16.isLeadSurrogate(sourceChar) || gotoGetTrail) {
|
||||
// getTrail label
|
||||
/* reset gotoGetTrail flag*/
|
||||
gotoGetTrail = false;
|
||||
|
@ -2280,7 +2280,7 @@ class CharsetISO2022 extends CharsetICU {
|
|||
source.position(source.position()-1);
|
||||
if (UTF16.isTrailSurrogate(trail)) {
|
||||
source.get();
|
||||
sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
|
||||
sourceChar = UCharacter.getCodePoint(sourceChar, trail);
|
||||
fromUChar32 = 0x00;
|
||||
/* convert this supplementary code point */
|
||||
/* exit this condition tree */
|
||||
|
@ -2767,8 +2767,8 @@ class CharsetISO2022 extends CharsetICU {
|
|||
*/
|
||||
|
||||
/* check if the char is a First surrogate */
|
||||
if (gotoGetTrail || UTF16.isSurrogate((char)sourceChar)) {
|
||||
if (gotoGetTrail || UTF16.isLeadSurrogate((char)sourceChar)) {
|
||||
if (gotoGetTrail || UTF16.isSurrogate(sourceChar)) {
|
||||
if (gotoGetTrail || UTF16.isLeadSurrogate(sourceChar)) {
|
||||
// getTrail label
|
||||
// reset gotoGetTrail flag
|
||||
gotoGetTrail = false;
|
||||
|
@ -2780,7 +2780,7 @@ class CharsetISO2022 extends CharsetICU {
|
|||
source.position(source.position()-1);
|
||||
if (UTF16.isTrailSurrogate(trail)) {
|
||||
source.get();
|
||||
sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
|
||||
sourceChar = UCharacter.getCodePoint(sourceChar, trail);
|
||||
err = CoderResult.unmappableForLength(2);
|
||||
/* convert this surrogate code point */
|
||||
/* exit this condition tree */
|
||||
|
|
|
@ -2946,7 +2946,7 @@ class CharsetMBCS extends CharsetICU {
|
|||
boolean doloop = true;
|
||||
boolean doread = true;
|
||||
if (c != 0 && target.hasRemaining()) {
|
||||
if (UTF16.isLeadSurrogate((char) c) && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
|
||||
if (UTF16.isLeadSurrogate(c) && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
|
||||
// c is a lead surrogate, read another input
|
||||
SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex,
|
||||
prevSourceIndex, prevLength);
|
||||
|
@ -2989,9 +2989,9 @@ class CharsetMBCS extends CharsetICU {
|
|||
* are not paired but mapped separately. Note that in this case unmatched surrogates are
|
||||
* not detected.
|
||||
*/
|
||||
if (UTF16.isSurrogate((char) c)
|
||||
if (UTF16.isSurrogate(c)
|
||||
&& (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
|
||||
if (UTF16.isLeadSurrogate((char) c)) {
|
||||
if (UTF16.isLeadSurrogate(c)) {
|
||||
// getTrail:
|
||||
SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex,
|
||||
nextSourceIndex, prevSourceIndex, prevLength);
|
||||
|
@ -4064,9 +4064,9 @@ class CharsetMBCS extends CharsetICU {
|
|||
/* normal end of conversion: prepare for a new character */
|
||||
c = 0;
|
||||
continue;
|
||||
} else if (!UTF16.isSurrogate((char) c)) {
|
||||
} else if (!UTF16.isSurrogate(c)) {
|
||||
/* normal, unassigned BMP character */
|
||||
} else if (UTF16.isLeadSurrogate((char) c)) {
|
||||
} else if (UTF16.isLeadSurrogate(c)) {
|
||||
// getTrail:
|
||||
SideEffectsSingleBMP x = new SideEffectsSingleBMP(c, sourceArrayIndex);
|
||||
doloop = getTrailSingleBMP(source, x, cr);
|
||||
|
@ -4195,7 +4195,7 @@ class CharsetMBCS extends CharsetICU {
|
|||
boolean doloop = true;
|
||||
boolean doread = true;
|
||||
if (c != 0 && target.hasRemaining()) {
|
||||
if (UTF16.isLeadSurrogate((char) c)) {
|
||||
if (UTF16.isLeadSurrogate(c)) {
|
||||
SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);
|
||||
doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
|
||||
doread = x.doread;
|
||||
|
@ -4225,8 +4225,8 @@ class CharsetMBCS extends CharsetICU {
|
|||
if (doread) {
|
||||
c = source.get(sourceArrayIndex++);
|
||||
++nextSourceIndex;
|
||||
if (UTF16.isSurrogate((char) c)) {
|
||||
if (UTF16.isLeadSurrogate((char) c)) {
|
||||
if (UTF16.isSurrogate(c)) {
|
||||
if (UTF16.isLeadSurrogate(c)) {
|
||||
// getTrail:
|
||||
SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
|
||||
nextSourceIndex);
|
||||
|
@ -4340,7 +4340,7 @@ class CharsetMBCS extends CharsetICU {
|
|||
boolean doloop = true;
|
||||
boolean doread = true;
|
||||
if (c != 0 && target.hasRemaining()) {
|
||||
if (UTF16.isLeadSurrogate((char) c)) {
|
||||
if (UTF16.isLeadSurrogate(c)) {
|
||||
SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);
|
||||
doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
|
||||
doread = x.doread;
|
||||
|
@ -4374,8 +4374,8 @@ class CharsetMBCS extends CharsetICU {
|
|||
* not paired but mapped separately. Note that in this case unmatched surrogates are not
|
||||
* detected.
|
||||
*/
|
||||
if (UTF16.isSurrogate((char) c) && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
|
||||
if (UTF16.isLeadSurrogate((char) c)) {
|
||||
if (UTF16.isSurrogate(c) && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
|
||||
if (UTF16.isLeadSurrogate(c)) {
|
||||
// getTrail:
|
||||
SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
|
||||
nextSourceIndex);
|
||||
|
@ -4504,7 +4504,7 @@ class CharsetMBCS extends CharsetICU {
|
|||
char trail = source.get(x.sourceArrayIndex);
|
||||
if (UTF16.isTrailSurrogate(trail)) {
|
||||
++x.sourceArrayIndex;
|
||||
x.c = UCharacter.getCodePoint((char) x.c, trail);
|
||||
x.c = UCharacter.getCodePoint(x.c, trail);
|
||||
/* this codepage does not map supplementary code points */
|
||||
/* callback(unassigned) */
|
||||
cr[0] = CoderResult.unmappableForLength(2);
|
||||
|
@ -4548,7 +4548,7 @@ class CharsetMBCS extends CharsetICU {
|
|||
++x.sourceArrayIndex;
|
||||
++x.nextSourceIndex;
|
||||
/* convert this supplementary code point */
|
||||
x.c = UCharacter.getCodePoint((char) x.c, trail);
|
||||
x.c = UCharacter.getCodePoint(x.c, trail);
|
||||
if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
|
||||
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
|
||||
fromUnicodeStatus = x.prevLength; /* save the old state */
|
||||
|
@ -4622,7 +4622,7 @@ class CharsetMBCS extends CharsetICU {
|
|||
++x.sourceArrayIndex;
|
||||
++x.nextSourceIndex;
|
||||
/* convert this supplementary code point */
|
||||
x.c = UCharacter.getCodePoint((char) x.c, trail);
|
||||
x.c = UCharacter.getCodePoint(x.c, trail);
|
||||
if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
|
||||
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
|
||||
/* callback(unassigned) */
|
||||
|
|
|
@ -813,9 +813,9 @@ class CharsetSCSU extends CharsetICU{
|
|||
offsets.put(sourceIndex);
|
||||
}
|
||||
--targetCapacity;
|
||||
} else if(AfterGetTrail || UTF16.isSurrogate((char)c)){
|
||||
} else if(AfterGetTrail || UTF16.isSurrogate(c)){
|
||||
if(!AfterGetTrail){
|
||||
if(UTF16.isLeadSurrogate((char)c)){
|
||||
if(UTF16.isLeadSurrogate(c)){
|
||||
label = getTrail(source, target, offsets);
|
||||
if(label==EndLoop){
|
||||
return label;
|
||||
|
@ -1058,7 +1058,7 @@ class CharsetSCSU extends CharsetICU{
|
|||
if(UTF16.isTrailSurrogate(trail)){
|
||||
source.position(source.position()+1);
|
||||
++nextSourceIndex;
|
||||
c = UCharacter.getCodePoint((char)c, trail);
|
||||
c = UCharacter.getCodePoint(c, trail);
|
||||
label = Loop;
|
||||
} else {
|
||||
/*this is unmatched lead code unit (1st Surrogate)*/
|
||||
|
@ -1078,7 +1078,7 @@ class CharsetSCSU extends CharsetICU{
|
|||
int label = EndLoop;
|
||||
AfterGetTrailUnicode = true;
|
||||
/*c is surrogate*/
|
||||
if(UTF16.isLeadSurrogate((char)c)){
|
||||
if(UTF16.isLeadSurrogate(c)){
|
||||
// getTrailUnicode:
|
||||
lead = (char)c;
|
||||
if(source.hasRemaining()){
|
||||
|
@ -1087,7 +1087,7 @@ class CharsetSCSU extends CharsetICU{
|
|||
if(UTF16.isTrailSurrogate(trail)){
|
||||
source.get();
|
||||
++nextSourceIndex;
|
||||
c = UCharacter.getCodePoint((char)c, trail);
|
||||
c = UCharacter.getCodePoint(c, trail);
|
||||
/*convert this surrogate code point*/
|
||||
/*exit this condition tree*/
|
||||
} else {
|
||||
|
|
|
@ -397,7 +397,7 @@ class CharsetUTF8 extends CharsetICU {
|
|||
}
|
||||
targetArray[tgtIdx++] = encodeLastTail(char32);
|
||||
|
||||
} else if (!UTF16.isSurrogate((char) char32) || isCESU8) {
|
||||
} else if (!UTF16.isSurrogate(char32) || isCESU8) {
|
||||
/* 3 bytes to encode from char32 */
|
||||
|
||||
targetArray[tgtIdx++] = encodeHeadOf3(char32);
|
||||
|
@ -481,7 +481,7 @@ class CharsetUTF8 extends CharsetICU {
|
|||
}
|
||||
target.put(encodeLastTail(char32));
|
||||
|
||||
} else if (!UTF16.isSurrogate((char) char32) || isCESU8) {
|
||||
} else if (!UTF16.isSurrogate(char32) || isCESU8) {
|
||||
/* 3 bytes to encode from char32 */
|
||||
|
||||
target.put(encodeHeadOf3(char32));
|
||||
|
|
|
@ -33,30 +33,30 @@ public final class CharacterIteration {
|
|||
// which leaves it in position for underlying iterator's next() to work.
|
||||
int c = ci.current();
|
||||
if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE && c<=UTF16.LEAD_SURROGATE_MAX_VALUE) {
|
||||
c = ci.next();
|
||||
c = ci.next();
|
||||
if (c<UTF16.TRAIL_SURROGATE_MIN_VALUE || c>UTF16.TRAIL_SURROGATE_MAX_VALUE) {
|
||||
ci.previous();
|
||||
ci.previous();
|
||||
}
|
||||
}
|
||||
|
||||
// For BMP chars, this next() is the real deal.
|
||||
c = ci.next();
|
||||
|
||||
// If we might have a lead surrogate, we need to peak ahead to get the trail
|
||||
|
||||
// If we might have a lead surrogate, we need to peak ahead to get the trail
|
||||
// even though we don't want to really be positioned there.
|
||||
if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
|
||||
c = nextTrail32(ci, c);
|
||||
c = nextTrail32(ci, c);
|
||||
}
|
||||
|
||||
|
||||
if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != DONE32) {
|
||||
// We got a supplementary char. Back the iterator up to the position
|
||||
// of the lead surrogate.
|
||||
ci.previous();
|
||||
ci.previous();
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Out-of-line portion of the in-line Next32 code.
|
||||
// The call site does an initial ci.next() and calls this function
|
||||
// if the 16 bit value it gets is >= LEAD_SURROGATE_MIN_VALUE.
|
||||
|
@ -81,36 +81,36 @@ public final class CharacterIteration {
|
|||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
public static int previous32(CharacterIterator ci) {
|
||||
if (ci.getIndex() <= ci.getBeginIndex()) {
|
||||
return DONE32;
|
||||
return DONE32;
|
||||
}
|
||||
char trail = ci.previous();
|
||||
int retVal = trail;
|
||||
if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) {
|
||||
char lead = ci.previous();
|
||||
if (UTF16.isLeadSurrogate(lead)) {
|
||||
retVal = (((int)lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
|
||||
((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
|
||||
retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
|
||||
(trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
|
||||
UTF16.SUPPLEMENTARY_MIN_VALUE;
|
||||
} else {
|
||||
ci.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
public static int current32(CharacterIterator ci) {
|
||||
char lead = ci.current();
|
||||
int retVal = lead;
|
||||
if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) {
|
||||
return retVal;
|
||||
return retVal;
|
||||
}
|
||||
if (UTF16.isLeadSurrogate(lead)) {
|
||||
int trail = (int)ci.next();
|
||||
int trail = ci.next();
|
||||
ci.previous();
|
||||
if (UTF16.isTrailSurrogate((char)trail)) {
|
||||
if (UTF16.isTrailSurrogate(trail)) {
|
||||
retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
|
||||
(trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
|
||||
UTF16.SUPPLEMENTARY_MIN_VALUE;
|
||||
|
@ -118,7 +118,7 @@ public final class CharacterIteration {
|
|||
} else {
|
||||
if (lead == CharacterIterator.DONE) {
|
||||
if (ci.getIndex() >= ci.getEndIndex()) {
|
||||
retVal = DONE32;
|
||||
retVal = DONE32;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -102,7 +102,7 @@ public class ReplaceableUCharacterIterator extends UCharacterIterator {
|
|||
// trail surrogate, check for surrogates
|
||||
|
||||
int ch = current();
|
||||
if(UTF16.isLeadSurrogate((char)ch)){
|
||||
if(UTF16.isLeadSurrogate(ch)){
|
||||
// advance the index to get the next code point
|
||||
next();
|
||||
// due to post increment semantics current() after next()
|
||||
|
@ -111,7 +111,7 @@ public class ReplaceableUCharacterIterator extends UCharacterIterator {
|
|||
// current should never change the current index so back off
|
||||
previous();
|
||||
|
||||
if(UTF16.isTrailSurrogate((char)ch2)){
|
||||
if(UTF16.isTrailSurrogate(ch2)){
|
||||
// we found a surrogate pair
|
||||
return Character.toCodePoint((char)ch, (char)ch2);
|
||||
}
|
||||
|
|
|
@ -865,7 +865,7 @@ public final class Utility {
|
|||
// if there is a trail surrogate after it, either as an
|
||||
// escape or as a literal. If so, join them up into a
|
||||
// supplementary.
|
||||
if (offset < length && result <= 0xffff && UTF16.isLeadSurrogate((char) result)) {
|
||||
if (offset < length && UTF16.isLeadSurrogate(result)) {
|
||||
int ahead = offset+1;
|
||||
c = s.charAt(offset); // [sic] get 16-bit code unit
|
||||
if (c == '\\' && ahead < length) {
|
||||
|
@ -875,7 +875,7 @@ public final class Utility {
|
|||
ahead += cpAndLength & 0xff;
|
||||
}
|
||||
}
|
||||
if (c <= 0xffff && UTF16.isTrailSurrogate((char) c)) {
|
||||
if (UTF16.isTrailSurrogate(c)) {
|
||||
offset = ahead;
|
||||
result = Character.toCodePoint((char) result, (char) c);
|
||||
}
|
||||
|
|
|
@ -160,6 +160,28 @@ import com.ibm.icu.util.VersionInfo;
|
|||
|
||||
public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||
{
|
||||
/**
|
||||
* Lead surrogate bitmask
|
||||
*/
|
||||
private static final int LEAD_SURROGATE_BITMASK = 0xFFFFFC00;
|
||||
|
||||
/**
|
||||
* Trail surrogate bitmask
|
||||
*/
|
||||
private static final int TRAIL_SURROGATE_BITMASK = 0xFFFFFC00;
|
||||
|
||||
/**
|
||||
* Lead surrogate bits
|
||||
*/
|
||||
private static final int LEAD_SURROGATE_BITS = 0xD800;
|
||||
|
||||
/**
|
||||
* Trail surrogate bits
|
||||
*/
|
||||
private static final int TRAIL_SURROGATE_BITS = 0xDC00;
|
||||
|
||||
private static final int U16_SURROGATE_OFFSET = ((0xd800 << 10) + 0xdc00 - 0x10000);
|
||||
|
||||
// public inner classes ----------------------------------------------
|
||||
|
||||
/**
|
||||
|
@ -5266,19 +5288,21 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
/**
|
||||
* {@icu} Returns a code point corresponding to the two surrogate code units.
|
||||
*
|
||||
* @param lead the lead char
|
||||
* @param trail the trail char
|
||||
* @return code point if surrogate characters are valid.
|
||||
* @param lead the lead unit
|
||||
* (In ICU 2.1-69 the type of both parameters was <code>char</code>.)
|
||||
* @param trail the trail unit
|
||||
* @return code point if lead and trail form a valid surrogate pair.
|
||||
* @exception IllegalArgumentException thrown when the code units do
|
||||
* not form a valid code point
|
||||
* @stable ICU 2.1
|
||||
* not form a valid surrogate pair
|
||||
* @stable ICU 70
|
||||
* @see #toCodePoint(int, int)
|
||||
*/
|
||||
public static int getCodePoint(char lead, char trail)
|
||||
public static int getCodePoint(int lead, int trail)
|
||||
{
|
||||
if (Character.isSurrogatePair(lead, trail)) {
|
||||
return Character.toCodePoint(lead, trail);
|
||||
if (isHighSurrogate(lead) && isLowSurrogate(trail)) {
|
||||
return toCodePoint(lead, trail);
|
||||
}
|
||||
throw new IllegalArgumentException("Illegal surrogate characters");
|
||||
throw new IllegalArgumentException("Not a valid surrogate pair");
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -6180,37 +6204,43 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
}
|
||||
|
||||
/**
|
||||
* Same as {@link Character#isHighSurrogate}.
|
||||
* Same as {@link Character#isHighSurrogate},
|
||||
* except that the ICU version accepts <code>int</code> for code points.
|
||||
*
|
||||
* @param ch the char to check
|
||||
* @return true if ch is a high (lead) surrogate
|
||||
* @stable ICU 3.0
|
||||
* @param codePoint the code point to check
|
||||
* (In ICU 3.0-69 the type of this parameter was <code>char</code>.)
|
||||
* @return true if codePoint is a high (lead) surrogate
|
||||
* @stable ICU 70
|
||||
*/
|
||||
public static boolean isHighSurrogate(char ch) {
|
||||
return Character.isHighSurrogate(ch);
|
||||
public static boolean isHighSurrogate(int codePoint) {
|
||||
return (codePoint & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as {@link Character#isLowSurrogate}.
|
||||
* Same as {@link Character#isLowSurrogate},
|
||||
* except that the ICU version accepts <code>int</code> for code points.
|
||||
*
|
||||
* @param ch the char to check
|
||||
* @return true if ch is a low (trail) surrogate
|
||||
* @stable ICU 3.0
|
||||
* @param codePoint the code point to check
|
||||
* (In ICU 3.0-69 the type of this parameter was <code>char</code>.)
|
||||
* @return true if codePoint is a low (trail) surrogate
|
||||
* @stable ICU 70
|
||||
*/
|
||||
public static boolean isLowSurrogate(char ch) {
|
||||
return Character.isLowSurrogate(ch);
|
||||
public static boolean isLowSurrogate(int codePoint) {
|
||||
return (codePoint & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as {@link Character#isSurrogatePair}.
|
||||
* Same as {@link Character#isSurrogatePair},
|
||||
* except that the ICU version accepts <code>int</code> for code points.
|
||||
*
|
||||
* @param high the high (lead) char
|
||||
* @param low the low (trail) char
|
||||
* @param high the high (lead) unit
|
||||
* (In ICU 3.0-69 the type of both parameters was <code>char</code>.)
|
||||
* @param low the low (trail) unit
|
||||
* @return true if high, low form a surrogate pair
|
||||
* @stable ICU 3.0
|
||||
* @stable ICU 70
|
||||
*/
|
||||
public static final boolean isSurrogatePair(char high, char low) {
|
||||
return Character.isSurrogatePair(high, low);
|
||||
public static final boolean isSurrogatePair(int high, int low) {
|
||||
return isHighSurrogate(high) && isLowSurrogate(low);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -6227,17 +6257,21 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
}
|
||||
|
||||
/**
|
||||
* Same as {@link Character#toCodePoint}.
|
||||
* Same as {@link Character#toCodePoint},
|
||||
* except that the ICU version accepts <code>int</code> for code points.
|
||||
* Returns the code point represented by the two surrogate code units.
|
||||
* This does not check the surrogate pair for validity.
|
||||
*
|
||||
* @param high the high (lead) surrogate
|
||||
* (In ICU 3.0-69 the type of both parameters was <code>char</code>.)
|
||||
* @param low the low (trail) surrogate
|
||||
* @return the code point formed by the surrogate pair
|
||||
* @stable ICU 3.0
|
||||
* @stable ICU 70
|
||||
* @see #getCodePoint(int, int)
|
||||
*/
|
||||
public static final int toCodePoint(char high, char low) {
|
||||
return Character.toCodePoint(high, low);
|
||||
public static final int toCodePoint(int high, int low) {
|
||||
// see ICU4C U16_GET_SUPPLEMENTARY()
|
||||
return (high << 10) + low - U16_SURROGATE_OFFSET;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -2237,7 +2237,7 @@ public final class Normalizer implements Cloneable {
|
|||
|
||||
/* get complete code points for c1, c2 for lookups if either is a surrogate */
|
||||
cp1=c1;
|
||||
if(UTF16.isSurrogate((char)c1)) {
|
||||
if(UTF16.isSurrogate(c1)) {
|
||||
char c;
|
||||
|
||||
if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
|
||||
|
@ -2253,7 +2253,7 @@ public final class Normalizer implements Cloneable {
|
|||
}
|
||||
|
||||
cp2=c2;
|
||||
if(UTF16.isSurrogate((char)c2)) {
|
||||
if(UTF16.isSurrogate(c2)) {
|
||||
char c;
|
||||
|
||||
if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
|
||||
|
@ -2277,7 +2277,7 @@ public final class Normalizer implements Cloneable {
|
|||
(length=csp.toFullFolding(cp1, fold1, options))>=0
|
||||
) {
|
||||
/* cp1 case-folds to the code point "length" or to p[length] */
|
||||
if(UTF16.isSurrogate((char)c1)) {
|
||||
if(UTF16.isSurrogate(c1)) {
|
||||
if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
|
||||
/* advance beyond source surrogate pair if it case-folds */
|
||||
++s1;
|
||||
|
@ -2325,7 +2325,7 @@ public final class Normalizer implements Cloneable {
|
|||
(length=csp.toFullFolding(cp2, fold2, options))>=0
|
||||
) {
|
||||
/* cp2 case-folds to the code point "length" or to p[length] */
|
||||
if(UTF16.isSurrogate((char)c2)) {
|
||||
if(UTF16.isSurrogate(c2)) {
|
||||
if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
|
||||
/* advance beyond source surrogate pair if it case-folds */
|
||||
++s2;
|
||||
|
@ -2373,7 +2373,7 @@ public final class Normalizer implements Cloneable {
|
|||
(decomp1=nfcImpl.getDecomposition(cp1))!=null
|
||||
) {
|
||||
/* cp1 decomposes into p[length] */
|
||||
if(UTF16.isSurrogate((char)c1)) {
|
||||
if(UTF16.isSurrogate(c1)) {
|
||||
if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
|
||||
/* advance beyond source surrogate pair if it decomposes */
|
||||
++s1;
|
||||
|
@ -2417,7 +2417,7 @@ public final class Normalizer implements Cloneable {
|
|||
(decomp2=nfcImpl.getDecomposition(cp2))!=null
|
||||
) {
|
||||
/* cp2 decomposes into p[length] */
|
||||
if(UTF16.isSurrogate((char)c2)) {
|
||||
if(UTF16.isSurrogate(c2)) {
|
||||
if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
|
||||
/* advance beyond source surrogate pair if it decomposes */
|
||||
++s2;
|
||||
|
|
|
@ -153,7 +153,7 @@ public abstract class UCharacterIterator implements Cloneable, UForwardCharacter
|
|||
*/
|
||||
public int currentCodePoint() {
|
||||
int ch = current();
|
||||
if (UTF16.isLeadSurrogate((char) ch)) {
|
||||
if (UTF16.isLeadSurrogate(ch)) {
|
||||
// advance the index to get the
|
||||
// next code point
|
||||
next();
|
||||
|
@ -165,7 +165,7 @@ public abstract class UCharacterIterator implements Cloneable, UForwardCharacter
|
|||
// the current index so back off
|
||||
previous();
|
||||
|
||||
if (UTF16.isTrailSurrogate((char) ch2)) {
|
||||
if (UTF16.isTrailSurrogate(ch2)) {
|
||||
// we found a surrogate pair
|
||||
// return the codepoint
|
||||
return Character.toCodePoint((char) ch, (char) ch2);
|
||||
|
@ -211,9 +211,9 @@ public abstract class UCharacterIterator implements Cloneable, UForwardCharacter
|
|||
@Override
|
||||
public int nextCodePoint() {
|
||||
int ch1 = next();
|
||||
if (UTF16.isLeadSurrogate((char) ch1)) {
|
||||
if (UTF16.isLeadSurrogate(ch1)) {
|
||||
int ch2 = next();
|
||||
if (UTF16.isTrailSurrogate((char) ch2)) {
|
||||
if (UTF16.isTrailSurrogate(ch2)) {
|
||||
return Character.toCodePoint((char) ch1, (char) ch2);
|
||||
} else if (ch2 != DONE) {
|
||||
// unmatched surrogate so back out
|
||||
|
@ -243,9 +243,9 @@ public abstract class UCharacterIterator implements Cloneable, UForwardCharacter
|
|||
*/
|
||||
public int previousCodePoint() {
|
||||
int ch1 = previous();
|
||||
if (UTF16.isTrailSurrogate((char) ch1)) {
|
||||
if (UTF16.isTrailSurrogate(ch1)) {
|
||||
int ch2 = previous();
|
||||
if (UTF16.isLeadSurrogate((char) ch2)) {
|
||||
if (UTF16.isLeadSurrogate(ch2)) {
|
||||
return Character.toCodePoint((char) ch2, (char) ch1);
|
||||
} else if (ch2 != DONE) {
|
||||
// unmatched trail surrogate so back out
|
||||
|
|
|
@ -596,36 +596,39 @@ public final class UTF16 {
|
|||
}
|
||||
|
||||
/**
|
||||
* Determines whether the code value is a surrogate.
|
||||
* Determines whether the code point is a surrogate.
|
||||
*
|
||||
* @param char16 The input character.
|
||||
* @return true If the input character is a surrogate.
|
||||
* @stable ICU 2.1
|
||||
* @param codePoint The input character.
|
||||
* (In ICU 2.1-69 the type of this parameter was <code>char</code>.)
|
||||
* @return true If the input code point is a surrogate.
|
||||
* @stable ICU 70
|
||||
*/
|
||||
public static boolean isSurrogate(char char16) {
|
||||
return (char16 & SURROGATE_BITMASK) == SURROGATE_BITS;
|
||||
public static boolean isSurrogate(int codePoint) {
|
||||
return (codePoint & SURROGATE_BITMASK) == SURROGATE_BITS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the character is a trail surrogate.
|
||||
* Determines whether the code point is a trail surrogate.
|
||||
*
|
||||
* @param char16 The input character.
|
||||
* @return true If the input character is a trail surrogate.
|
||||
* @stable ICU 2.1
|
||||
* @param codePoint The input character.
|
||||
* (In ICU 2.1-69 the type of this parameter was <code>char</code>.)
|
||||
* @return true If the input code point is a trail surrogate.
|
||||
* @stable ICU 70
|
||||
*/
|
||||
public static boolean isTrailSurrogate(char char16) {
|
||||
return (char16 & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
|
||||
public static boolean isTrailSurrogate(int codePoint) {
|
||||
return (codePoint & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the character is a lead surrogate.
|
||||
* Determines whether the code point is a lead surrogate.
|
||||
*
|
||||
* @param char16 The input character.
|
||||
* @return true If the input character is a lead surrogate
|
||||
* @stable ICU 2.1
|
||||
* @param codePoint The input character.
|
||||
* (In ICU 2.1-69 the type of this parameter was <code>char</code>.)
|
||||
* @return true If the input code point is a lead surrogate
|
||||
* @stable ICU 70
|
||||
*/
|
||||
public static boolean isLeadSurrogate(char char16) {
|
||||
return (char16 & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
|
||||
public static boolean isLeadSurrogate(int codePoint) {
|
||||
return (codePoint & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1545,7 +1548,7 @@ public final class UTF16 {
|
|||
if (char32 < SUPPLEMENTARY_MIN_VALUE) {
|
||||
int result = source.indexOf((char) char32);
|
||||
if (result >= 0) {
|
||||
if (isLeadSurrogate((char) char32) && (result < source.length() - 1)
|
||||
if (isLeadSurrogate(char32) && (result < source.length() - 1)
|
||||
&& isTrailSurrogate(source.charAt(result + 1))) {
|
||||
return indexOf(source, char32, result + 1);
|
||||
}
|
||||
|
@ -1646,7 +1649,7 @@ public final class UTF16 {
|
|||
if (char32 < SUPPLEMENTARY_MIN_VALUE) {
|
||||
int result = source.indexOf((char) char32, fromIndex);
|
||||
if (result >= 0) {
|
||||
if (isLeadSurrogate((char) char32) && (result < source.length() - 1)
|
||||
if (isLeadSurrogate(char32) && (result < source.length() - 1)
|
||||
&& isTrailSurrogate(source.charAt(result + 1))) {
|
||||
return indexOf(source, char32, result + 1);
|
||||
}
|
||||
|
@ -1748,7 +1751,7 @@ public final class UTF16 {
|
|||
if (char32 < SUPPLEMENTARY_MIN_VALUE) {
|
||||
int result = source.lastIndexOf((char) char32);
|
||||
if (result >= 0) {
|
||||
if (isLeadSurrogate((char) char32) && (result < source.length() - 1)
|
||||
if (isLeadSurrogate(char32) && (result < source.length() - 1)
|
||||
&& isTrailSurrogate(source.charAt(result + 1))) {
|
||||
return lastIndexOf(source, char32, result - 1);
|
||||
}
|
||||
|
@ -1859,7 +1862,7 @@ public final class UTF16 {
|
|||
if (char32 < SUPPLEMENTARY_MIN_VALUE) {
|
||||
int result = source.lastIndexOf((char) char32, fromIndex);
|
||||
if (result >= 0) {
|
||||
if (isLeadSurrogate((char) char32) && (result < source.length() - 1)
|
||||
if (isLeadSurrogate(char32) && (result < source.length() - 1)
|
||||
&& isTrailSurrogate(source.charAt(result + 1))) {
|
||||
return lastIndexOf(source, char32, result - 1);
|
||||
}
|
||||
|
|
|
@ -544,7 +544,7 @@ public class TestCharset extends TestFmwk {
|
|||
bytes[x + 1] = (byte) (0x80 | ((i >> 6) & 0x3f));
|
||||
bytes[x + 2] = (byte) (0x80 | ((i >> 0) & 0x3f));
|
||||
chars[y] = (char) i;
|
||||
if (!UTF16.isSurrogate((char)i)) {
|
||||
if (!UTF16.isSurrogate(i)) {
|
||||
bs = ByteBuffer.wrap(bytes, x, 3).slice();
|
||||
us = CharBuffer.wrap(chars, y, 1).slice();
|
||||
try {
|
||||
|
|
|
@ -78,50 +78,58 @@ public final class UCharacterSurrogateTest extends TestFmwk {
|
|||
|
||||
@Test
|
||||
public void TestIsHighSurrogate() {
|
||||
if (UCharacter
|
||||
.isHighSurrogate((char) (UCharacter.MIN_HIGH_SURROGATE - 1)))
|
||||
// Test with both int & char values.
|
||||
if (UCharacter.isHighSurrogate(UCharacter.MIN_HIGH_SURROGATE - 1) ||
|
||||
UCharacter.isHighSurrogate((char) (UCharacter.MIN_HIGH_SURROGATE - 1)))
|
||||
errln("0xd7ff");
|
||||
if (!UCharacter.isHighSurrogate(UCharacter.MIN_HIGH_SURROGATE))
|
||||
errln("0xd800");
|
||||
if (!UCharacter.isHighSurrogate(UCharacter.MAX_HIGH_SURROGATE))
|
||||
errln("0xdbff");
|
||||
if (UCharacter
|
||||
.isHighSurrogate((char) (UCharacter.MAX_HIGH_SURROGATE + 1)))
|
||||
if (UCharacter.isHighSurrogate(UCharacter.MAX_HIGH_SURROGATE + 1) ||
|
||||
UCharacter.isHighSurrogate((char) (UCharacter.MAX_HIGH_SURROGATE + 1)))
|
||||
errln("0xdc00");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestIsLowSurrogate() {
|
||||
if (UCharacter
|
||||
.isLowSurrogate((char) (UCharacter.MIN_LOW_SURROGATE - 1)))
|
||||
// Test with both int & char values.
|
||||
if (UCharacter.isLowSurrogate(UCharacter.MIN_LOW_SURROGATE - 1) ||
|
||||
UCharacter.isLowSurrogate((char) (UCharacter.MIN_LOW_SURROGATE - 1)))
|
||||
errln("0xdbff");
|
||||
if (!UCharacter.isLowSurrogate(UCharacter.MIN_LOW_SURROGATE))
|
||||
errln("0xdc00");
|
||||
if (!UCharacter.isLowSurrogate(UCharacter.MAX_LOW_SURROGATE))
|
||||
errln("0xdfff");
|
||||
if (UCharacter
|
||||
.isLowSurrogate((char) (UCharacter.MAX_LOW_SURROGATE + 1)))
|
||||
if (UCharacter.isLowSurrogate(UCharacter.MAX_LOW_SURROGATE + 1) ||
|
||||
UCharacter.isLowSurrogate((char) (UCharacter.MAX_LOW_SURROGATE + 1)))
|
||||
errln("0xe000");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestIsSurrogatePair() {
|
||||
// Test with both int & char values.
|
||||
if (UCharacter.isSurrogatePair(
|
||||
(char) (UCharacter.MIN_HIGH_SURROGATE - 1),
|
||||
UCharacter.MIN_LOW_SURROGATE))
|
||||
UCharacter.MIN_HIGH_SURROGATE - 1, UCharacter.MIN_LOW_SURROGATE) ||
|
||||
UCharacter.isSurrogatePair(
|
||||
(char) (UCharacter.MIN_HIGH_SURROGATE - 1), UCharacter.MIN_LOW_SURROGATE))
|
||||
errln("0xd7ff,0xdc00");
|
||||
if (UCharacter.isSurrogatePair(
|
||||
(char) (UCharacter.MAX_HIGH_SURROGATE + 1),
|
||||
UCharacter.MIN_LOW_SURROGATE))
|
||||
UCharacter.MAX_HIGH_SURROGATE + 1, UCharacter.MIN_LOW_SURROGATE) ||
|
||||
UCharacter.isSurrogatePair(
|
||||
(char) (UCharacter.MAX_HIGH_SURROGATE + 1), UCharacter.MIN_LOW_SURROGATE))
|
||||
errln("0xd800,0xdc00");
|
||||
if (UCharacter.isSurrogatePair(UCharacter.MIN_HIGH_SURROGATE,
|
||||
(char) (UCharacter.MIN_LOW_SURROGATE - 1)))
|
||||
if (UCharacter.isSurrogatePair(
|
||||
UCharacter.MIN_HIGH_SURROGATE, UCharacter.MIN_LOW_SURROGATE - 1) ||
|
||||
UCharacter.isSurrogatePair(
|
||||
UCharacter.MIN_HIGH_SURROGATE, (char) (UCharacter.MIN_LOW_SURROGATE - 1)))
|
||||
errln("0xd800,0xdbff");
|
||||
if (UCharacter.isSurrogatePair(UCharacter.MIN_HIGH_SURROGATE,
|
||||
(char) (UCharacter.MAX_LOW_SURROGATE + 1)))
|
||||
if (UCharacter.isSurrogatePair(
|
||||
UCharacter.MIN_HIGH_SURROGATE, UCharacter.MAX_LOW_SURROGATE + 1) ||
|
||||
UCharacter.isSurrogatePair(
|
||||
UCharacter.MIN_HIGH_SURROGATE, (char) (UCharacter.MAX_LOW_SURROGATE + 1)))
|
||||
errln("0xd800,0xe000");
|
||||
if (!UCharacter.isSurrogatePair(UCharacter.MIN_HIGH_SURROGATE,
|
||||
UCharacter.MIN_LOW_SURROGATE))
|
||||
if (!UCharacter.isSurrogatePair(UCharacter.MIN_HIGH_SURROGATE, UCharacter.MIN_LOW_SURROGATE))
|
||||
errln("0xd800,0xdc00");
|
||||
}
|
||||
|
||||
|
@ -157,6 +165,9 @@ public final class UCharacterSurrogateTest extends TestFmwk {
|
|||
errln(Integer.toHexString(pairs[i]) + ", " + pairs[i + 1]);
|
||||
break;
|
||||
}
|
||||
// Also test with int values.
|
||||
int cp2 = UCharacter.toCodePoint(pairs[i], pairs[i + 1]);
|
||||
assertEquals("pairs at " + i, cp, cp2);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1526,8 +1526,14 @@ public final class UCharacterTest extends TestFmwk
|
|||
ch ++;
|
||||
}
|
||||
}
|
||||
try
|
||||
{
|
||||
// Test with both char & int values.
|
||||
try {
|
||||
UCharacter.getCodePoint(0xD7ff, 0xDC00);
|
||||
errln("Invalid surrogate characters should not form a " +
|
||||
"supplementary");
|
||||
} catch(Exception e) {
|
||||
}
|
||||
try {
|
||||
UCharacter.getCodePoint((char)0xD7ff, (char)0xDC00);
|
||||
errln("Invalid surrogate characters should not form a " +
|
||||
"supplementary");
|
||||
|
|
|
@ -480,26 +480,35 @@ public final class UTF16Test extends TestFmwk
|
|||
@Test
|
||||
public void TestGetCharCountSurrogate()
|
||||
{
|
||||
if (UTF16.getCharCount(0x61) != 1 ||
|
||||
UTF16.getCharCount(0x10000) != 2) {
|
||||
errln("FAIL getCharCount result failure");
|
||||
if (UTF16.getCharCount(0x61) != 1 || UTF16.getCharCount(0x10000) != 2) {
|
||||
errln("FAIL getCharCount result failure");
|
||||
}
|
||||
// ICU-21655 (ICU 70) widened the surrogate functions from char to int.
|
||||
// Test with both types, in case someone like Android retains binary-compatibility overloads.
|
||||
if (UTF16.getLeadSurrogate(0x61) != 0 ||
|
||||
UTF16.getTrailSurrogate(0x61) != 0x61 ||
|
||||
UTF16.isLeadSurrogate((char)0x61) ||
|
||||
UTF16.isTrailSurrogate((char)0x61) ||
|
||||
UTF16.getLeadSurrogate(0x10000) != 0xd800 ||
|
||||
UTF16.getTrailSurrogate(0x10000) != 0xdc00 ||
|
||||
UTF16.isLeadSurrogate((char)0xd800) != true ||
|
||||
UTF16.isTrailSurrogate((char)0xd800) ||
|
||||
UTF16.isLeadSurrogate((char)0xdc00) ||
|
||||
UTF16.isTrailSurrogate((char)0xdc00) != true) {
|
||||
errln("FAIL *Surrogate result failure");
|
||||
UTF16.getTrailSurrogate(0x61) != 0x61 ||
|
||||
UTF16.isLeadSurrogate((char)0x61) ||
|
||||
UTF16.isTrailSurrogate((char)0x61) ||
|
||||
UTF16.isLeadSurrogate(0x61) ||
|
||||
UTF16.isTrailSurrogate(0x61) ||
|
||||
UTF16.getLeadSurrogate(0x10000) != 0xd800 ||
|
||||
UTF16.getTrailSurrogate(0x10000) != 0xdc00 ||
|
||||
UTF16.isLeadSurrogate((char)0xd800) != true ||
|
||||
UTF16.isTrailSurrogate((char)0xd800) ||
|
||||
UTF16.isLeadSurrogate((char)0xdc00) ||
|
||||
UTF16.isTrailSurrogate((char)0xdc00) != true ||
|
||||
UTF16.isLeadSurrogate(0xd800) != true ||
|
||||
UTF16.isTrailSurrogate(0xd800) ||
|
||||
UTF16.isLeadSurrogate(0xdc00) ||
|
||||
UTF16.isTrailSurrogate(0xdc00) != true) {
|
||||
errln("FAIL *Surrogate result failure");
|
||||
}
|
||||
|
||||
if (UTF16.isSurrogate((char)0x61) || !UTF16.isSurrogate((char)0xd800)
|
||||
|| !UTF16.isSurrogate((char)0xdc00)) {
|
||||
errln("FAIL isSurrogate result failure");
|
||||
|| !UTF16.isSurrogate((char)0xdc00)
|
||||
|| UTF16.isSurrogate(0x61) || !UTF16.isSurrogate(0xd800)
|
||||
|| !UTF16.isSurrogate(0xdc00)) {
|
||||
errln("FAIL isSurrogate result failure");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -287,7 +287,7 @@ public final class TrieTest extends TestFmwk
|
|||
+ Integer.toHexString(value2) + " instead of 0x"
|
||||
+ Integer.toHexString(value));
|
||||
}
|
||||
if (!UTF16.isLeadSurrogate((char)start)) {
|
||||
if (!UTF16.isLeadSurrogate(start)) {
|
||||
value2 = trie.getLeadValue((char)start);
|
||||
if (value != value2) {
|
||||
errln("serialized trie.getLeadValue(U+"
|
||||
|
|
Loading…
Add table
Reference in a new issue