diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetCallback.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetCallback.java index a76fec0d6e9..c013e9e3a1b 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetCallback.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetCallback.java @@ -79,6 +79,40 @@ public class CharsetCallback { */ private static final String ESCAPE_CSS2 = "S"; + /* + * This list should be sync with the one in ucnv_err.c + */ + private static boolean IS_DEFAULT_IGNORABLE_CODE_POINT(int c) { + return ((c == 0x00AD) || + (c == 0x034F) || + (c == 0x061C) || + (c == 0x115F) || + (c == 0x1160) || + (0x17B4 <= c && c <= 0x17B5) || + (0x180B <= c && c <= 0x180E) || + (0x200B <= c && c <= 0x200F) || + (0x202A <= c && c <= 0x202E) || + (c == 0x2060) || + (0x2066 <= c && c <= 0x2069) || + (0x2061 <= c && c <= 0x2064) || + (0x206A <= c && c <= 0x206F) || + (c == 0x3164) || + (0x0FE00 <= c && c <= 0x0FE0F) || + (c == 0x0FEFF) || + (c == 0x0FFA0) || + (0x01BCA0 <= c && c <= 0x01BCA3) || + (0x01D173 <= c && c <= 0x01D17A) || + (c == 0x0E0001) || + (0x0E0020 <= c && c <= 0x0E007F) || + (0x0E0100 <= c && c <= 0x0E01EF) || + (c == 0x2065) || + (0x0FFF0 <= c && c <= 0x0FFF8) || + (c == 0x0E0000) || + (0x0E0002 <= c && c <= 0x0E001F) || + (0x0E0080 <= c && c <= 0x0E00FF) || + (0x0E01F0 <= c && c <= 0x0E0FFF) + ); + } /** * Decoder Callback interface * @stable ICU 3.6 @@ -160,7 +194,9 @@ public class CharsetCallback { public CoderResult call(CharsetEncoderICU encoder, Object context, CharBuffer source, ByteBuffer target, IntBuffer offsets, char[] buffer, int length, int cp, CoderResult cr){ - if(context==null){ + if (cr.isUnmappable() && IS_DEFAULT_IGNORABLE_CODE_POINT(cp)) { + return CoderResult.UNDERFLOW; + }else if(context==null){ return encoder.cbFromUWriteSub(encoder, source, target, offsets); }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){ if(!cr.isUnmappable()){ @@ -207,6 +243,9 @@ public class CharsetCallback { public CoderResult call(CharsetEncoderICU encoder, Object context, CharBuffer source, ByteBuffer target, IntBuffer offsets, char[] buffer, int length, int cp, CoderResult cr){ + if (cr.isUnmappable() && IS_DEFAULT_IGNORABLE_CODE_POINT(cp)) { + return CoderResult.UNDERFLOW; + } return cr; } }; @@ -247,6 +286,10 @@ public class CharsetCallback { int valueStringLength = 0; int i = 0; + if (cr.isUnmappable() && IS_DEFAULT_IGNORABLE_CODE_POINT(cp)) { + return CoderResult.UNDERFLOW; + } + if (context == null || !(context instanceof String)) { while (i < length) { valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ diff --git a/icu4j/main/tests/charset/src/com/ibm/icu/dev/test/charset/TestCharset.java b/icu4j/main/tests/charset/src/com/ibm/icu/dev/test/charset/TestCharset.java index 995dc32e517..22fff130f05 100644 --- a/icu4j/main/tests/charset/src/com/ibm/icu/dev/test/charset/TestCharset.java +++ b/icu4j/main/tests/charset/src/com/ibm/icu/dev/test/charset/TestCharset.java @@ -1,6 +1,6 @@ /** ******************************************************************************* -* Copyright (C) 2006-2012, International Business Machines Corporation and * +* Copyright (C) 2006-2014, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* * @@ -33,6 +33,7 @@ import com.ibm.icu.charset.CharsetICU; import com.ibm.icu.charset.CharsetProviderICU; import com.ibm.icu.dev.test.TestFmwk; import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; public class TestCharset extends TestFmwk { private String m_encoding = "UTF-16"; @@ -5735,4 +5736,26 @@ public class TestCharset extends TestFmwk { } } + + public void TestDefaultIgnorableCallback() { + String name = "euc-jp-2007"; + String pattern = "[:Default_Ignorable_Code_Point:]"; + UnicodeSet set = new UnicodeSet(pattern); + CharsetEncoder encoder = CharsetICU.forNameICU(name).newEncoder(); + + // set callback for the converter + encoder.onUnmappableCharacter(CodingErrorAction.REPORT); + + int size = set.size(); + for (int i = 0; i < size; i++) { + CharBuffer input = CharBuffer.wrap(Character.toChars(set.charAt(i))); + encoder.reset(); + try { + encoder.encode(CharBuffer.wrap(Character.toChars(set.charAt(i)))); + } catch (Exception ex) { + errln("Callback should have ignore default ignorable: 0x" + Integer.toHexString(set.charAt(i))); + } + + } + } }