From 383247126fb6e280b1fa6986090a7cb86bcd8e37 Mon Sep 17 00:00:00 2001 From: Ram Viswanadha Date: Thu, 31 Aug 2006 21:45:16 +0000 Subject: [PATCH] ICU-5018 add fixes for JDK 5.0 X-SVN-Rev: 20217 --- icu4j/src/com/ibm/icu/impl/CharsetMBCS.java | 191 +++++++++++++----- icu4j/src/com/ibm/icu/impl/CharsetUTF16.java | 24 ++- .../src/com/ibm/icu/impl/CharsetUTF16LE.java | 22 +- icu4j/src/com/ibm/icu/impl/CharsetUTF32.java | 28 +-- .../src/com/ibm/icu/impl/CharsetUTF32LE.java | 28 +-- icu4j/src/com/ibm/icu/impl/CharsetUTF8.java | 27 +-- .../com/ibm/icu/impl/UConverterConstants.java | 21 ++ .../ibm/icu/impl/UConverterDataReader.java | 4 +- .../ibm/icu/impl/UConverterSharedData.java | 89 +------- 9 files changed, 231 insertions(+), 203 deletions(-) diff --git a/icu4j/src/com/ibm/icu/impl/CharsetMBCS.java b/icu4j/src/com/ibm/icu/impl/CharsetMBCS.java index 6060846f283..c712ee1b94d 100644 --- a/icu4j/src/com/ibm/icu/impl/CharsetMBCS.java +++ b/icu4j/src/com/ibm/icu/impl/CharsetMBCS.java @@ -16,7 +16,6 @@ import java.nio.BufferOverflowException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.IntBuffer; -import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; @@ -24,17 +23,95 @@ import java.nio.charset.CoderResult; import com.ibm.icu.charset.CharsetDecoderICU; import com.ibm.icu.charset.CharsetEncoderICU; import com.ibm.icu.charset.CharsetICU; -import com.ibm.icu.impl.UConverterSharedData.UConverterMBCSTable; -import com.ibm.icu.impl.UConverterSharedData.MBCSHeader; -import com.ibm.icu.impl.UConverterSharedData.MBCSToUFallback; import com.ibm.icu.impl.UConverterSharedData.UConverterType; +import com.ibm.icu.lang.UCharacter; import com.ibm.icu.text.UTF16; public class CharsetMBCS extends CharsetICU { protected byte[] fromUSubstitution = null; protected UConverterSharedData sharedData = null; + public static final int MAX_VERSION_LENGTH=4; + /** + * Fallbacks to Unicode are stored outside the normal state table and code point structures + * in a vector of items of this type. They are sorted by offset. + */ + public final class MBCSToUFallback { + int offset; + int codePoint; + } + /** + * This is the MBCS part of the UConverterTable union (a runtime data structure). + * It keeps all the per-converter data and points into the loaded mapping tables. + */ + public static final class UConverterMBCSTable { + /* toUnicode */ + short countStates; + byte dbcsOnlyState; + boolean stateTableOwned; + int countToUFallbacks; + int stateTable[/*countStates*/][/*256*/]; + int swapLFNLStateTable[/*countStates*/][/*256*/]; /* for swaplfnl */ + char unicodeCodeUnits[/*countUnicodeResults*/]; + MBCSToUFallback toUFallbacks[/*countToUFallbacks*/]; + + /* fromUnicode */ + char fromUnicodeTable[]; + byte fromUnicodeBytes[]; + byte swapLFNLFromUnicodeBytes[]; /* for swaplfnl */ + int fromUBytesLength; + short outputType, unicodeMask; + + /* converter name for swaplfnl */ + String swapLFNLName; + + /* extension data */ + UConverterSharedData baseSharedData; + //int extIndexes[]; + ByteBuffer extIndexes; // create int[] view etc. as needed + + UConverterMBCSTable() + { + } + + UConverterMBCSTable(UConverterMBCSTable t) + { + countStates = t.countStates; + dbcsOnlyState = t.dbcsOnlyState; + stateTableOwned = t.stateTableOwned; + countToUFallbacks = t.countToUFallbacks; + stateTable = t.stateTable; + swapLFNLStateTable = t.swapLFNLStateTable; + unicodeCodeUnits = t.unicodeCodeUnits; + toUFallbacks = t.toUFallbacks; + fromUnicodeTable = t.fromUnicodeTable; + fromUnicodeBytes = t.fromUnicodeBytes; + swapLFNLFromUnicodeBytes = t.swapLFNLFromUnicodeBytes; + fromUBytesLength = t.fromUBytesLength; + outputType = t.outputType; + unicodeMask = t.unicodeMask; + swapLFNLName = t.swapLFNLName; + baseSharedData = t.baseSharedData; + extIndexes = t.extIndexes; + } + } + + /** + * MBCS data header. See data format description above. + */ + final class MBCSHeader { + byte version[/*U_MAX_VERSION_LENGTH*/]; + int countStates, countToUFallbacks, offsetToUCodeUnits, offsetFromUTable, offsetFromUBytes; + int flags; + int fromUBytesLength; + + public MBCSHeader() + { + version = new byte[MAX_VERSION_LENGTH]; + } + } + public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases) throws InvalidFormatException{ super(icuCanonicalName, javaCanonicalName, aliases); @@ -76,7 +153,7 @@ public class CharsetMBCS extends CharsetICU { UConverterDataReader reader = null; try { InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/" + args.name + "." + UConverterSharedData.DATA_TYPE); - BufferedInputStream b = new BufferedInputStream(i, UConverterSharedData.CNV_DATA_BUFFER_SIZE); + BufferedInputStream b = new BufferedInputStream(i, UConverterConstants.CNV_DATA_BUFFER_SIZE); reader = new UConverterDataReader(b); reader.readStaticData(staticData); } @@ -103,7 +180,7 @@ public class CharsetMBCS extends CharsetICU { // Load data UConverterMBCSTable mbcsTable = data.mbcs; - MBCSHeader header = data.new MBCSHeader(); + MBCSHeader header = new MBCSHeader(); try { reader.readMBCSHeader(header); } @@ -315,7 +392,7 @@ public class CharsetMBCS extends CharsetICU { stateTableArray = new int[header.countStates][256]; toUFallbacksArray = new MBCSToUFallback[header.countToUFallbacks]; for(int i = 0; i < toUFallbacksArray.length; ++i) - toUFallbacksArray[i] = data.new MBCSToUFallback(); + toUFallbacksArray[i] = new MBCSToUFallback(); unicodeCodeUnitsArray = new char[(header.offsetFromUTable - header.offsetToUCodeUnits)/2]; fromUnicodeTableArray = new char[(header.offsetFromUBytes - header.offsetFromUTable)/2]; fromUnicodeBytesArray = new byte[header.fromUBytesLength]; @@ -553,35 +630,35 @@ public class CharsetMBCS extends CharsetICU { //------------UConverterExt------------------------------------------------------- - protected static final int INDEXES_LENGTH = 0; /* 0 */ + protected static final int EXT_INDEXES_LENGTH = 0; /* 0 */ - protected static final int TO_U_INDEX = INDEXES_LENGTH + 1; /* 1 */ - protected static final int TO_U_LENGTH = TO_U_INDEX + 1; - protected static final int TO_U_UCHARS_INDEX = TO_U_LENGTH + 1; - protected static final int TO_U_UCHARS_LENGTH = TO_U_UCHARS_INDEX + 1; + protected static final int EXT_TO_U_INDEX = EXT_INDEXES_LENGTH + 1; /* 1 */ + protected static final int EXT_TO_U_LENGTH = EXT_TO_U_INDEX + 1; + protected static final int EXT_TO_U_UCHARS_INDEX = EXT_TO_U_LENGTH + 1; + protected static final int EXT_TO_U_UCHARS_LENGTH = EXT_TO_U_UCHARS_INDEX + 1; - protected static final int FROM_U_UCHARS_INDEX = TO_U_UCHARS_LENGTH + 1; /* 5 */ - protected static final int FROM_U_VALUES_INDEX = FROM_U_UCHARS_INDEX + 1; - protected static final int FROM_U_LENGTH = FROM_U_VALUES_INDEX + 1; - protected static final int FROM_U_BYTES_INDEX = FROM_U_LENGTH + 1; - protected static final int FROM_U_BYTES_LENGTH = FROM_U_BYTES_INDEX + 1; + protected static final int EXT_FROM_U_UCHARS_INDEX = EXT_TO_U_UCHARS_LENGTH + 1; /* 5 */ + protected static final int EXT_FROM_U_VALUES_INDEX = EXT_FROM_U_UCHARS_INDEX + 1; + protected static final int EXT_FROM_U_LENGTH = EXT_FROM_U_VALUES_INDEX + 1; + protected static final int EXT_FROM_U_BYTES_INDEX = EXT_FROM_U_LENGTH + 1; + protected static final int EXT_FROM_U_BYTES_LENGTH = EXT_FROM_U_BYTES_INDEX + 1; - protected static final int FROM_U_STAGE_12_INDEX = FROM_U_BYTES_LENGTH + 1; /* 10 */ - protected static final int FROM_U_STAGE_1_LENGTH = FROM_U_STAGE_12_INDEX + 1; - protected static final int FROM_U_STAGE_12_LENGTH = FROM_U_STAGE_1_LENGTH + 1; - protected static final int FROM_U_STAGE_3_INDEX = FROM_U_STAGE_12_LENGTH + 1; - protected static final int FROM_U_STAGE_3_LENGTH = FROM_U_STAGE_3_INDEX + 1; - protected static final int FROM_U_STAGE_3B_INDEX = FROM_U_STAGE_3_LENGTH + 1; - protected static final int FROM_U_STAGE_3B_LENGTH = FROM_U_STAGE_3B_INDEX + 1; + protected static final int EXT_FROM_U_STAGE_12_INDEX = EXT_FROM_U_BYTES_LENGTH + 1; /* 10 */ + protected static final int EXT_FROM_U_STAGE_1_LENGTH = EXT_FROM_U_STAGE_12_INDEX + 1; + protected static final int EXT_FROM_U_STAGE_12_LENGTH = EXT_FROM_U_STAGE_1_LENGTH + 1; + protected static final int EXT_FROM_U_STAGE_3_INDEX = EXT_FROM_U_STAGE_12_LENGTH + 1; + protected static final int EXT_FROM_U_STAGE_3_LENGTH = EXT_FROM_U_STAGE_3_INDEX + 1; + protected static final int EXT_FROM_U_STAGE_3B_INDEX = EXT_FROM_U_STAGE_3_LENGTH + 1; + protected static final int EXT_FROM_U_STAGE_3B_LENGTH = EXT_FROM_U_STAGE_3B_INDEX + 1; - protected static final int COUNT_BYTES = FROM_U_STAGE_3B_LENGTH + 1; /* 17 */ - protected static final int COUNT_UCHARS = COUNT_BYTES + 1; - protected static final int FLAGS = COUNT_UCHARS + 1; + protected static final int EXT_COUNT_BYTES = EXT_FROM_U_STAGE_3B_LENGTH + 1; /* 17 */ + protected static final int EXT_COUNT_UCHARS = EXT_COUNT_BYTES + 1; + protected static final int EXT_FLAGS = EXT_COUNT_UCHARS + 1; - protected static final int RESERVED_INDEX = FLAGS + 1; /* 20, moves with additional indexes */ + protected static final int EXT_RESERVED_INDEX = EXT_FLAGS + 1; /* 20, moves with additional indexes */ - protected static final int SIZE=31; - protected static final int INDEXES_MIN_LENGTH=32; + protected static final int EXT_SIZE=31; + protected static final int EXT_INDEXES_MIN_LENGTH=32; /* toUnicode helpers -------------------------------------------------------- */ @@ -718,11 +795,11 @@ public class CharsetMBCS extends CharsetICU { IntBuffer a = indexes.asIntBuffer(); int n; if(a.hasArray()) - n = a.array()[COUNT_BYTES]; + n = a.array()[EXT_COUNT_BYTES]; else - n = a.get(COUNT_BYTES); + n = a.get(EXT_COUNT_BYTES); - return indexes.getInt(4*COUNT_BYTES)&0xff; + return indexes.getInt(4*n)&0xff; } /* @@ -1372,12 +1449,12 @@ public class CharsetMBCS extends CharsetICU { int i, j, index, length, matchLength; short b; - if(cx==null || cx.asIntBuffer().get(TO_U_LENGTH)<=0) { + if(cx==null || cx.asIntBuffer().get(EXT_TO_U_LENGTH)<=0) { return 0; /* no extension data, no match */ } /* initialize */ - toUTable = (IntBuffer)ARRAY(cx, TO_U_INDEX, int.class); + toUTable = (IntBuffer)ARRAY(cx, EXT_TO_U_INDEX, int.class); index = 0; matchValue = 0; @@ -1489,7 +1566,7 @@ public class CharsetMBCS extends CharsetICU { /* output a string - with correct data we have resultLength>0 */ char[] a = new char[TO_U_GET_LENGTH(value)]; - CharBuffer cb = ((CharBuffer)ARRAY(cx, TO_U_UCHARS_INDEX, char.class)); + CharBuffer cb = ((CharBuffer)ARRAY(cx, EXT_TO_U_UCHARS_INDEX, char.class)); cb.position(TO_U_GET_INDEX(value)); cb.get(a, 0, a.length); return toUWriteUChars(this, a, 0, a.length, target, offsets, srcIndex); @@ -2575,15 +2652,15 @@ public class CharsetMBCS extends CharsetICU { /* trie lookup of firstCP */ index=firstCP>>>10; /* stage 1 index */ - if(index>=cx.asIntBuffer().get(FROM_U_STAGE_1_LENGTH)) { + if(index>=cx.asIntBuffer().get(EXT_FROM_U_STAGE_1_LENGTH)) { return 0; /* the first code point is outside the trie */ } - stage12 = (CharBuffer)ARRAY(cx, FROM_U_STAGE_12_INDEX, char.class); - stage3 = (CharBuffer)ARRAY(cx, FROM_U_STAGE_3_INDEX, char.class); + stage12 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_12_INDEX, char.class); + stage3 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3_INDEX, char.class); index = FROM_U(stage12, stage3, index, firstCP); - stage3b = (IntBuffer)ARRAY(cx, FROM_U_STAGE_3B_INDEX, int.class); + stage3b = (IntBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3B_INDEX, int.class); value = stage3b.get(stage3b.position() + index); if(value==0) { return 0; @@ -2594,8 +2671,8 @@ public class CharsetMBCS extends CharsetICU { index = FROM_U_GET_PARTIAL_INDEX(value); /* initialize */ - fromUTableUChars = (CharBuffer)ARRAY(cx, FROM_U_UCHARS_INDEX, char.class); - fromUTableValues = (IntBuffer)ARRAY(cx, FROM_U_VALUES_INDEX, int.class); + fromUTableUChars = (CharBuffer)ARRAY(cx, EXT_FROM_U_UCHARS_INDEX, char.class); + fromUTableValues = (IntBuffer)ARRAY(cx, EXT_FROM_U_VALUES_INDEX, int.class); matchValue=0; i=j=matchLength=0; @@ -2738,7 +2815,7 @@ public class CharsetMBCS extends CharsetICU { else { byte[] slice = new byte[length]; - ByteBuffer bb = ((ByteBuffer)ARRAY(cx, FROM_U_BYTES_INDEX, byte.class)); + ByteBuffer bb = ((ByteBuffer)ARRAY(cx, EXT_FROM_U_BYTES_INDEX, byte.class)); bb.position(value); bb.get(slice, 0, slice.length); @@ -3400,7 +3477,7 @@ public class CharsetMBCS extends CharsetICU { char trail=source.get(x.sourceArrayIndex); if(UTF16.isTrailSurrogate(trail)) { ++x.sourceArrayIndex; - x.c = UTF16.getCodePoint((char)x.c, trail); + x.c = UCharacter.getCodePoint((char)x.c, trail); /* this codepage does not map supplementary code points */ /* callback(unassigned) */ } else { @@ -3439,7 +3516,7 @@ public class CharsetMBCS extends CharsetICU { if(UTF16.isTrailSurrogate(trail)) { ++x.sourceArrayIndex; ++x.nextSourceIndex; - x.c = UTF16.getCodePoint((char)x.c, trail); + x.c = UCharacter.getCodePoint((char)x.c, trail); if((unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) { /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ fromUnicodeStatus = x.prevLength; /* save the old state */ @@ -3511,7 +3588,7 @@ public class CharsetMBCS extends CharsetICU { if(UTF16.isTrailSurrogate(trail)) { ++x.sourceArrayIndex; ++x.nextSourceIndex; - x.c = UTF16.getCodePoint((char)x.c, trail); + x.c = UCharacter.getCodePoint((char)x.c, trail); if((unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) { /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ /* callback(unassigned) */ @@ -3565,14 +3642,16 @@ public class CharsetMBCS extends CharsetICU { public CharsetEncoder newEncoder() { return new CharsetEncoderMBCS(this); } - /* (non-Javadoc) - * @see java.lang.Comparable#compareTo(java.lang.Object) - */ - public int compareTo(Object o) { - if(o instanceof Charset){ - return super.compareTo((Charset)o); - } - return -1; - } - +//#ifdef VERSION_1.5 +// /** +// * Implements compareTo method of Comparable interface +// * @see java.lang.Comparable#compareTo(java.lang.Object) +// */ +// public int compareTo(Object o) { +// if(o instanceof Charset){ +// return super.compareTo((Charset)o); +// } +// return -1; +// } +//#endif } diff --git a/icu4j/src/com/ibm/icu/impl/CharsetUTF16.java b/icu4j/src/com/ibm/icu/impl/CharsetUTF16.java index b0c9633c9a8..6430b154773 100644 --- a/icu4j/src/com/ibm/icu/impl/CharsetUTF16.java +++ b/icu4j/src/com/ibm/icu/impl/CharsetUTF16.java @@ -12,7 +12,6 @@ import java.nio.BufferOverflowException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.IntBuffer; -import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; @@ -103,7 +102,7 @@ public class CharsetUTF16 extends CharsetICU { } else /* targetCapacity==1 */ { charErrorBufferArray[charErrorBufferBegin+0]=trail; charErrorBufferLength=1; - throw new BufferOverflowException(); + return CoderResult.OVERFLOW; } count=0; c=0; @@ -444,13 +443,16 @@ public class CharsetUTF16 extends CharsetICU { public CharsetEncoder newEncoder() { return new CharsetEncoderUTF16(this); } - /* (non-Javadoc) - * @see java.lang.Comparable#compareTo(java.lang.Object) - */ - public int compareTo(Object o) { - if(o instanceof Charset){ - return super.compareTo((Charset)o); - } - return -1; - } +//#ifdef VERSION_1.5 +// /** +// * Implements compareTo method of Comparable interface +// * @see java.lang.Comparable#compareTo(java.lang.Object) +// */ +// public int compareTo(Object o) { +// if(o instanceof Charset){ +// return super.compareTo((Charset)o); +// } +// return -1; +// } +//#endif } diff --git a/icu4j/src/com/ibm/icu/impl/CharsetUTF16LE.java b/icu4j/src/com/ibm/icu/impl/CharsetUTF16LE.java index 5566d305688..b25af01bcb5 100644 --- a/icu4j/src/com/ibm/icu/impl/CharsetUTF16LE.java +++ b/icu4j/src/com/ibm/icu/impl/CharsetUTF16LE.java @@ -12,7 +12,6 @@ import java.nio.BufferOverflowException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.IntBuffer; -import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; @@ -447,13 +446,16 @@ public class CharsetUTF16LE extends CharsetICU { public CharsetEncoder newEncoder() { return new CharsetEncoderUTF16LE(this); } - /* (non-Javadoc) - * @see java.lang.Comparable#compareTo(java.lang.Object) - */ - public int compareTo(Object o) { - if(o instanceof Charset){ - return super.compareTo((Charset)o); - } - return -1; - } +//#ifdef VERSION_1.5 +// /** +// * Implements compareTo method of Comparable interface +// * @see java.lang.Comparable#compareTo(java.lang.Object) +// */ +// public int compareTo(Object o) { +// if(o instanceof Charset){ +// return super.compareTo((Charset)o); +// } +// return -1; +// } +//#endif } diff --git a/icu4j/src/com/ibm/icu/impl/CharsetUTF32.java b/icu4j/src/com/ibm/icu/impl/CharsetUTF32.java index 00e1b7bbecf..d4cc63f4dc5 100644 --- a/icu4j/src/com/ibm/icu/impl/CharsetUTF32.java +++ b/icu4j/src/com/ibm/icu/impl/CharsetUTF32.java @@ -12,7 +12,6 @@ import java.nio.BufferOverflowException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.IntBuffer; -import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; @@ -68,9 +67,9 @@ public class CharsetUTF32 extends CharsetICU { } } - if (ch <= UConverterSharedData.MAXIMUM_UTF && !UTF16.isSurrogate((char)ch)) { + if (ch <= UConverterConstants.MAXIMUM_UTF && !UTF16.isSurrogate((char)ch)) { /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ - if (ch <= UConverterSharedData.MAXIMUM_UCS2) + if (ch <= UConverterConstants.MAXIMUM_UCS2) { /* fits in 16 bits */ target.put((char)ch); @@ -204,7 +203,7 @@ public class CharsetUTF32 extends CharsetICU { if (sourceArrayIndex < source.limit()) { ch2 = source.get(sourceArrayIndex); if (UTF16.isTrailSurrogate((char)ch2)) { - ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE; + ch = ((ch - UConverterConstants.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE; sourceArrayIndex++; } else { @@ -316,13 +315,16 @@ public class CharsetUTF32 extends CharsetICU { public CharsetEncoder newEncoder() { return new CharsetEncoderUTF32(this); } - /* (non-Javadoc) - * @see java.lang.Comparable#compareTo(java.lang.Object) - */ - public int compareTo(Object o) { - if(o instanceof Charset){ - return super.compareTo((Charset)o); - } - return -1; - } +//#ifdef VERSION_1.5 +// /** +// * Implements compareTo method of Comparable interface +// * @see java.lang.Comparable#compareTo(java.lang.Object) +// */ +// public int compareTo(Object o) { +// if(o instanceof Charset){ +// return super.compareTo((Charset)o); +// } +// return -1; +// } +//#endif } diff --git a/icu4j/src/com/ibm/icu/impl/CharsetUTF32LE.java b/icu4j/src/com/ibm/icu/impl/CharsetUTF32LE.java index 688c7a581cd..a62686a96d0 100644 --- a/icu4j/src/com/ibm/icu/impl/CharsetUTF32LE.java +++ b/icu4j/src/com/ibm/icu/impl/CharsetUTF32LE.java @@ -12,7 +12,6 @@ import java.nio.BufferOverflowException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.IntBuffer; -import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; @@ -68,9 +67,9 @@ public class CharsetUTF32LE extends CharsetICU { } } - if (ch <= UConverterSharedData.MAXIMUM_UTF && !UTF16.isSurrogate((char)ch)) { + if (ch <= UConverterConstants.MAXIMUM_UTF && !UTF16.isSurrogate((char)ch)) { /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ - if (ch <= UConverterSharedData.MAXIMUM_UCS2) + if (ch <= UConverterConstants.MAXIMUM_UCS2) { /* fits in 16 bits */ target.put((char)ch); @@ -204,7 +203,7 @@ public class CharsetUTF32LE extends CharsetICU { if (sourceArrayIndex < source.limit()) { ch2 = source.get(sourceArrayIndex); if (UTF16.isTrailSurrogate((char)ch2)) { - ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE; + ch = ((ch - UConverterConstants.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE; sourceArrayIndex++; } else { @@ -316,13 +315,16 @@ public class CharsetUTF32LE extends CharsetICU { public CharsetEncoder newEncoder() { return new CharsetEncoderUTF32LE(this); } - /* (non-Javadoc) - * @see java.lang.Comparable#compareTo(java.lang.Object) - */ - public int compareTo(Object o) { - if(o instanceof Charset){ - return super.compareTo((Charset)o); - } - return -1; - } +//#ifdef VERSION_1.5 +// /** +// * Implements compareTo method of Comparable interface +// * @see java.lang.Comparable#compareTo(java.lang.Object) +// */ +// public int compareTo(Object o) { +// if(o instanceof Charset){ +// return super.compareTo((Charset)o); +// } +// return -1; +// } +//#endif } diff --git a/icu4j/src/com/ibm/icu/impl/CharsetUTF8.java b/icu4j/src/com/ibm/icu/impl/CharsetUTF8.java index e757e48b854..4e77c987afa 100644 --- a/icu4j/src/com/ibm/icu/impl/CharsetUTF8.java +++ b/icu4j/src/com/ibm/icu/impl/CharsetUTF8.java @@ -13,7 +13,6 @@ import java.nio.BufferOverflowException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.IntBuffer; -import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; @@ -21,6 +20,7 @@ import java.nio.charset.CoderResult; import com.ibm.icu.charset.CharsetDecoderICU; import com.ibm.icu.charset.CharsetEncoderICU; import com.ibm.icu.charset.CharsetICU; +import com.ibm.icu.lang.UCharacter; import com.ibm.icu.text.UTF16; /** * @author Niti Hantaweepant @@ -303,7 +303,7 @@ public class CharsetUTF8 extends CharsetICU { char trail = source.get(sourceArrayIndex); if(UTF16.isTrailSurrogate(trail)) { ++sourceArrayIndex; - ch = UTF16.getCodePoint((char)ch, trail); + ch = UCharacter.getCodePoint((char)ch, trail); /* convert this supplementary code point */ /* exit this condition tree */ } else { @@ -381,7 +381,7 @@ public class CharsetUTF8 extends CharsetICU { char trail = source.get(sourceArrayIndex); if(UTF16.isTrailSurrogate(trail)) { ++sourceArrayIndex; - ch = UTF16.getCodePoint((char)ch, trail); + ch = UCharacter.getCodePoint((char)ch, trail); //ch2 = 0; /* convert this supplementary code point */ /* exit this condition tree */ @@ -506,13 +506,16 @@ public class CharsetUTF8 extends CharsetICU { public CharsetEncoder newEncoder() { return new CharsetEncoderUTF8(this); } - /* (non-Javadoc) - * @see java.lang.Comparable#compareTo(java.lang.Object) - */ - public int compareTo(Object o) { - if(o instanceof Charset){ - return super.compareTo((Charset)o); - } - return -1; - } +//#ifdef VERSION_1.5 +// /** +// * Implements compareTo method of Comparable interface +// * @see java.lang.Comparable#compareTo(java.lang.Object) +// */ +// public int compareTo(Object o) { +// if(o instanceof Charset){ +// return super.compareTo((Charset)o); +// } +// return -1; +// } +//#endif } diff --git a/icu4j/src/com/ibm/icu/impl/UConverterConstants.java b/icu4j/src/com/ibm/icu/impl/UConverterConstants.java index a4bcdff5857..255ada4016e 100644 --- a/icu4j/src/com/ibm/icu/impl/UConverterConstants.java +++ b/icu4j/src/com/ibm/icu/impl/UConverterConstants.java @@ -8,6 +8,8 @@ */ package com.ibm.icu.impl; +import com.ibm.icu.impl.UConverterSharedData.cnvNameTypeClass; + public interface UConverterConstants { public static final short UNSIGNED_BYTE_MASK = 0xff; @@ -153,4 +155,23 @@ public interface UConverterConstants { */ } //end err.h + + + static final String DATA_TYPE = "cnv"; + static final int CNV_DATA_BUFFER_SIZE = 25000; + static final int SIZE_OF_UCONVERTER_SHARED_DATA = 100; + + static final int MAXIMUM_UCS2 = 0x0000FFFF; + static final int MAXIMUM_UTF = 0x0010FFFF; + static final int MAXIMUM_UCS4 = 0x7FFFFFFF; + static final int HALF_SHIFT = 10; + static final int HALF_BASE = 0x0010000; + static final int HALF_MASK = 0x3FF; + static final int SURROGATE_HIGH_START = 0xD800; + static final int SURROGATE_HIGH_END = 0xDBFF; + static final int SURROGATE_LOW_START = 0xDC00; + static final int SURROGATE_LOW_END = 0xDFFF; + + /* -SURROGATE_LOW_START + HALF_BASE */ + static final int SURROGATE_LOW_BASE = 9216; } diff --git a/icu4j/src/com/ibm/icu/impl/UConverterDataReader.java b/icu4j/src/com/ibm/icu/impl/UConverterDataReader.java index 48ab399cf6e..ca6009a3746 100644 --- a/icu4j/src/com/ibm/icu/impl/UConverterDataReader.java +++ b/icu4j/src/com/ibm/icu/impl/UConverterDataReader.java @@ -451,7 +451,7 @@ public final class UConverterDataReader implements ICUBinary.Authenticate { dataInputStream.read(sd.reserved); } - protected void readMBCSHeader(UConverterSharedData.MBCSHeader h) throws IOException + protected void readMBCSHeader(CharsetMBCS.MBCSHeader h) throws IOException { dataInputStream.read(h.version); h.countStates = dataInputStream.readInt(); @@ -463,7 +463,7 @@ public final class UConverterDataReader implements ICUBinary.Authenticate { h.fromUBytesLength = dataInputStream.readInt(); } - protected void readMBCSTable(int[][] stateTableArray, UConverterSharedData.MBCSToUFallback[] toUFallbacksArray, char[] unicodeCodeUnitsArray, char[] fromUnicodeTableArray, byte[] fromUnicodeBytesArray) throws IOException + protected void readMBCSTable(int[][] stateTableArray, CharsetMBCS.MBCSToUFallback[] toUFallbacksArray, char[] unicodeCodeUnitsArray, char[] fromUnicodeTableArray, byte[] fromUnicodeBytesArray) throws IOException { int i, j; for(i = 0; i < stateTableArray.length; ++i) diff --git a/icu4j/src/com/ibm/icu/impl/UConverterSharedData.java b/icu4j/src/com/ibm/icu/impl/UConverterSharedData.java index f1cb9f76874..8838b640efd 100644 --- a/icu4j/src/com/ibm/icu/impl/UConverterSharedData.java +++ b/icu4j/src/com/ibm/icu/impl/UConverterSharedData.java @@ -20,7 +20,7 @@ public class UConverterSharedData { public int structSize; /* Size of this structure */ //uint32_t referenceCounter; /* used to count number of clients, 0xffffffff for static SharedData */ public int referenceCounter; /* used to count number of clients, 0xffffffff for static SharedData */ - public static final int MAX_VERSION_LENGTH=4; + //agljport:todo const void *dataMemory; /* from udata_openChoice() - for cleanup */ //agljport:todo void *table; /* Unused. This used to be a UConverterTable - Pointer to conversion data - see mbcs below */ @@ -50,13 +50,12 @@ public class UConverterSharedData { * The table field above also remains to avoid updating all static * definitions, but is now unused. * - * markus 2003-nov-07 */ - public UConverterMBCSTable mbcs; + public CharsetMBCS.UConverterMBCSTable mbcs; public UConverterSharedData() { - mbcs = new UConverterMBCSTable(); + mbcs = new CharsetMBCS.UConverterMBCSTable(); } public UConverterSharedData(int structSize_, int referenceCounter_, UConverterStaticData staticData_, boolean sharedDataCached_,/* UConverterImpl impl_,*/ long toUnicodeStatus_) @@ -373,88 +372,6 @@ public class UConverterSharedData { return null; } - - /** - * Fallbacks to Unicode are stored outside the normal state table and code point structures - * in a vector of items of this type. They are sorted by offset. - */ - public final class MBCSToUFallback { - int offset; - int codePoint; - } - - /** - * This is the MBCS part of the UConverterTable union (a runtime data structure). - * It keeps all the per-converter data and points into the loaded mapping tables. - */ - public final class UConverterMBCSTable { - /* toUnicode */ - short countStates; - byte dbcsOnlyState; - boolean stateTableOwned; - int countToUFallbacks; - - int stateTable[/*countStates*/][/*256*/]; - int swapLFNLStateTable[/*countStates*/][/*256*/]; /* for swaplfnl */ - char unicodeCodeUnits[/*countUnicodeResults*/]; - MBCSToUFallback toUFallbacks[/*countToUFallbacks*/]; - - /* fromUnicode */ - char fromUnicodeTable[]; - byte fromUnicodeBytes[]; - byte swapLFNLFromUnicodeBytes[]; /* for swaplfnl */ - int fromUBytesLength; - short outputType, unicodeMask; - - /* converter name for swaplfnl */ - String swapLFNLName; - - /* extension data */ - UConverterSharedData baseSharedData; - //int extIndexes[]; - ByteBuffer extIndexes; // create int[] view etc. as needed - - UConverterMBCSTable() - { - } - - UConverterMBCSTable(UConverterMBCSTable t) - { - countStates = t.countStates; - dbcsOnlyState = t.dbcsOnlyState; - stateTableOwned = t.stateTableOwned; - countToUFallbacks = t.countToUFallbacks; - stateTable = t.stateTable; - swapLFNLStateTable = t.swapLFNLStateTable; - unicodeCodeUnits = t.unicodeCodeUnits; - toUFallbacks = t.toUFallbacks; - fromUnicodeTable = t.fromUnicodeTable; - fromUnicodeBytes = t.fromUnicodeBytes; - swapLFNLFromUnicodeBytes = t.swapLFNLFromUnicodeBytes; - fromUBytesLength = t.fromUBytesLength; - outputType = t.outputType; - unicodeMask = t.unicodeMask; - swapLFNLName = t.swapLFNLName; - baseSharedData = t.baseSharedData; - extIndexes = t.extIndexes; - } - } - - /** - * MBCS data header. See data format description above. - */ - public final class MBCSHeader { - byte version[/*U_MAX_VERSION_LENGTH*/]; - int countStates, countToUFallbacks, offsetToUCodeUnits, offsetFromUTable, offsetFromUBytes; - int flags; - int fromUBytesLength; - - public MBCSHeader() - { - version = new byte[MAX_VERSION_LENGTH]; - } - } - /** * Enum for specifying basic types of converters * @see getType