ICU-10551 Make ICU4J encoder callbacks ignore default ignorable code points

X-SVN-Rev: 36200
This commit is contained in:
Michael Ow 2014-08-18 21:28:18 +00:00
parent 9a4ae3b440
commit 1c4e81baed
2 changed files with 68 additions and 2 deletions

View file

@ -79,6 +79,40 @@ public class CharsetCallback {
*/
private static final String ESCAPE_CSS2 = "S";
/*
* This list should be sync with the one in ucnv_err.c
*/
private static boolean IS_DEFAULT_IGNORABLE_CODE_POINT(int c) {
return ((c == 0x00AD) ||
(c == 0x034F) ||
(c == 0x061C) ||
(c == 0x115F) ||
(c == 0x1160) ||
(0x17B4 <= c && c <= 0x17B5) ||
(0x180B <= c && c <= 0x180E) ||
(0x200B <= c && c <= 0x200F) ||
(0x202A <= c && c <= 0x202E) ||
(c == 0x2060) ||
(0x2066 <= c && c <= 0x2069) ||
(0x2061 <= c && c <= 0x2064) ||
(0x206A <= c && c <= 0x206F) ||
(c == 0x3164) ||
(0x0FE00 <= c && c <= 0x0FE0F) ||
(c == 0x0FEFF) ||
(c == 0x0FFA0) ||
(0x01BCA0 <= c && c <= 0x01BCA3) ||
(0x01D173 <= c && c <= 0x01D17A) ||
(c == 0x0E0001) ||
(0x0E0020 <= c && c <= 0x0E007F) ||
(0x0E0100 <= c && c <= 0x0E01EF) ||
(c == 0x2065) ||
(0x0FFF0 <= c && c <= 0x0FFF8) ||
(c == 0x0E0000) ||
(0x0E0002 <= c && c <= 0x0E001F) ||
(0x0E0080 <= c && c <= 0x0E00FF) ||
(0x0E01F0 <= c && c <= 0x0E0FFF)
);
}
/**
* Decoder Callback interface
* @stable ICU 3.6
@ -160,7 +194,9 @@ public class CharsetCallback {
public CoderResult call(CharsetEncoderICU encoder, Object context,
CharBuffer source, ByteBuffer target, IntBuffer offsets,
char[] buffer, int length, int cp, CoderResult cr){
if(context==null){
if (cr.isUnmappable() && IS_DEFAULT_IGNORABLE_CODE_POINT(cp)) {
return CoderResult.UNDERFLOW;
}else if(context==null){
return encoder.cbFromUWriteSub(encoder, source, target, offsets);
}else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
if(!cr.isUnmappable()){
@ -207,6 +243,9 @@ public class CharsetCallback {
public CoderResult call(CharsetEncoderICU encoder, Object context,
CharBuffer source, ByteBuffer target, IntBuffer offsets,
char[] buffer, int length, int cp, CoderResult cr){
if (cr.isUnmappable() && IS_DEFAULT_IGNORABLE_CODE_POINT(cp)) {
return CoderResult.UNDERFLOW;
}
return cr;
}
};
@ -247,6 +286,10 @@ public class CharsetCallback {
int valueStringLength = 0;
int i = 0;
if (cr.isUnmappable() && IS_DEFAULT_IGNORABLE_CODE_POINT(cp)) {
return CoderResult.UNDERFLOW;
}
if (context == null || !(context instanceof String)) {
while (i < length) {
valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */

View file

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 2006-2012, International Business Machines Corporation and *
* Copyright (C) 2006-2014, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
@ -33,6 +33,7 @@ import com.ibm.icu.charset.CharsetICU;
import com.ibm.icu.charset.CharsetProviderICU;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
public class TestCharset extends TestFmwk {
private String m_encoding = "UTF-16";
@ -5735,4 +5736,26 @@ public class TestCharset extends TestFmwk {
}
}
public void TestDefaultIgnorableCallback() {
String name = "euc-jp-2007";
String pattern = "[:Default_Ignorable_Code_Point:]";
UnicodeSet set = new UnicodeSet(pattern);
CharsetEncoder encoder = CharsetICU.forNameICU(name).newEncoder();
// set callback for the converter
encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
int size = set.size();
for (int i = 0; i < size; i++) {
CharBuffer input = CharBuffer.wrap(Character.toChars(set.charAt(i)));
encoder.reset();
try {
encoder.encode(CharBuffer.wrap(Character.toChars(set.charAt(i))));
} catch (Exception ex) {
errln("Callback should have ignore default ignorable: 0x" + Integer.toHexString(set.charAt(i)));
}
}
}
}