diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetCallback.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetCallback.java index 7363fba08fe..a76fec0d6e9 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetCallback.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetCallback.java @@ -1,11 +1,9 @@ /** ******************************************************************************* -* Copyright (C) 2006-2011, International Business Machines Corporation and * -* others. All Rights Reserved. * +* Copyright (C) 2006-2014, International Business Machines Corporation and +* others. All Rights Reserved. ******************************************************************************* -* -******************************************************************************* -*/ +*/ package com.ibm.icu.charset; @@ -253,14 +251,14 @@ public class CharsetCallback { while (i < length) { valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */ - valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4); + valueStringLength += itou(valueString, valueStringLength, buffer[i++], 16, 4); } } else { if (((String)context).equals(ESCAPE_JAVA)) { while (i < length) { valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */ valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */ - valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4); + valueStringLength += itou(valueString, valueStringLength, buffer[i++], 16, 4); } } else if (((String)context).equals(ESCAPE_C)) { valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */ @@ -270,7 +268,7 @@ public class CharsetCallback { valueStringLength = itou(valueString, valueStringLength, cp, 16, 8); } else { valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */ - valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4); + valueStringLength += itou(valueString, valueStringLength, buffer[0], 16, 4); } } else if (((String)context).equals(ESCAPE_XML_DEC)) { valueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */ @@ -278,7 +276,7 @@ public class CharsetCallback { if (length == 2) { valueStringLength += itou(valueString, valueStringLength, cp, 10, 0); } else { - valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 10, 0); + valueStringLength += itou(valueString, valueStringLength, buffer[0], 10, 0); } valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ } else if (((String)context).equals(ESCAPE_XML_HEX)) { @@ -288,7 +286,7 @@ public class CharsetCallback { if (length == 2) { valueStringLength += itou(valueString, valueStringLength, cp, 16, 0); } else { - valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 0); + valueStringLength += itou(valueString, valueStringLength, buffer[0], 16, 0); } valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ } else if (((String)context).equals(ESCAPE_UNICODE)) { @@ -298,7 +296,7 @@ public class CharsetCallback { if (length == 2) { valueStringLength += itou(valueString, valueStringLength,cp, 16, 4); } else { - valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4); + valueStringLength += itou(valueString, valueStringLength, buffer[0], 16, 4); } valueString[valueStringLength++] = UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */ } else if (((String)context).equals(ESCAPE_CSS2)) { @@ -311,7 +309,7 @@ public class CharsetCallback { while (i < length) { valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */ - valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4); + valueStringLength += itou(valueString, valueStringLength, buffer[i++], 16, 4); } } } diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java index d366cbe4180..02424154b38 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java @@ -1,7 +1,7 @@ /* ******************************************************************************* - * Copyright (C) 2008-2011, International Business Machines Corporation and * - * others. All Rights Reserved. * + * Copyright (C) 2008-2014, International Business Machines Corporation and + * others. All Rights Reserved. ******************************************************************************* */ package com.ibm.icu.charset; @@ -271,13 +271,13 @@ class CharsetISO2022 extends CharsetICU { char[] table; int value; /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ - if (c >= 0x10000 && (sharedData.mbcs.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) { + if (c >= 0x10000 && !sharedData.mbcs.hasSupplementary()) { return 0; } /* convert the Unicode code point in c into codepage bytes */ table = sharedData.mbcs.fromUnicodeTable; /* get the byte for the output */ - value = CharsetMBCS.MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c); + value = CharsetMBCS.MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeChars, c); /* get the byte for the output */ retval[0] = value & 0xff; if (value >= 0xf00) { diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java index 06dfa14aa7b..f4c42f570b0 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java @@ -54,6 +54,11 @@ class CharsetMBCS extends CharsetICU { final static class MBCSToUFallback { int offset; int codePoint; + + MBCSToUFallback(int off, int cp) { + offset = off; + codePoint = cp; + } } /** @@ -73,9 +78,14 @@ class CharsetMBCS extends CharsetICU { MBCSToUFallback toUFallbacks[/* countToUFallbacks */]; /* fromUnicode */ - char fromUnicodeTable[]; + char fromUnicodeTable[]; // stage1, and for MBCS_OUTPUT_1 also contains stage2 + int fromUnicodeTableInts[]; // stage1 and stage2 together as int[] + // Exactly one of the fromUnicode(Type) tables is not null, + // depending on the outputType. byte fromUnicodeBytes[]; - byte swapLFNLFromUnicodeBytes[]; /* for swaplfnl */ + char fromUnicodeChars[]; + int fromUnicodeInts[]; + char swapLFNLFromUnicodeChars[]; /* for swaplfnl */ int fromUBytesLength; short outputType, unicodeMask; @@ -88,17 +98,20 @@ class CharsetMBCS extends CharsetICU { ByteBuffer extIndexes; // create int[] view etc. as needed CharBuffer mbcsIndex; /* for fast conversion from most of BMP to MBCS (utf8Friendly data) */ - char sbcsIndex[/* SBCS_FAST_LIMIT>>6 */]; /* for fast conversion from low BMP to SBCS (utf8Friendly data) */ + // char sbcsIndex[/* SBCS_FAST_LIMIT>>6 */]; /* for fast conversion from low BMP to SBCS (utf8Friendly data) */ boolean utf8Friendly; /* for utf8Friendly data */ char maxFastUChar; /* for utf8Friendly data */ /* roundtrips */ - long asciiRoundtrips; + int asciiRoundtrips; UConverterMBCSTable() { utf8Friendly = false; mbcsIndex = null; - sbcsIndex = new char[SBCS_FAST_LIMIT>>6]; + } + + boolean hasSupplementary() { + return (unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) != 0; } /* @@ -106,7 +119,7 @@ class CharsetMBCS extends CharsetICU { * stateTableOwned = t.stateTableOwned; countToUFallbacks = t.countToUFallbacks; stateTable = t.stateTable; * swapLFNLStateTable = t.swapLFNLStateTable; unicodeCodeUnits = t.unicodeCodeUnits; toUFallbacks = * t.toUFallbacks; fromUnicodeTable = t.fromUnicodeTable; fromUnicodeBytes = t.fromUnicodeBytes; - * swapLFNLFromUnicodeBytes = t.swapLFNLFromUnicodeBytes; fromUBytesLength = t.fromUBytesLength; outputType = + * swapLFNLFromUnicodeChars = t.swapLFNLFromUnicodeChars; fromUBytesLength = t.fromUBytesLength; outputType = * t.outputType; unicodeMask = t.unicodeMask; swapLFNLName = t.swapLFNLName; baseSharedData = t.baseSharedData; * extIndexes = t.extIndexes; } */ @@ -193,15 +206,19 @@ class CharsetMBCS extends CharsetICU { UConverterStaticData staticData = new UConverterStaticData(); UConverterDataReader reader = null; try { - String resourceName = classPath + "/" + myName + "." + UConverterSharedData.DATA_TYPE; - InputStream i; + String itemName = myName + '.' + UConverterSharedData.DATA_TYPE; + String resourceName = classPath + '/' + itemName; + ByteBuffer b; if (loader != null) { - i = ICUData.getRequiredStream(loader, resourceName); + InputStream i = ICUData.getRequiredStream(loader, resourceName); + b = ICUBinary.getByteBufferFromInputStream(i); + } else if (!classPath.equals(ICUData.ICU_BUNDLE)) { + InputStream i = ICUData.getRequiredStream(resourceName); + b = ICUBinary.getByteBufferFromInputStream(i); } else { - i = ICUData.getRequiredStream(resourceName); + b = ICUBinary.getRequiredData(itemName); } - ByteBuffer b = ICUBinary.getByteBufferFromInputStream(i); reader = new UConverterDataReader(b); reader.readStaticData(staticData); } catch (IOException e) { @@ -235,11 +252,6 @@ class CharsetMBCS extends CharsetICU { int offset; // int[] extIndexesArray = null; String baseNameString = null; - int[][] stateTableArray = null; - MBCSToUFallback[] toUFallbacksArray = null; - char[] unicodeCodeUnitsArray = null; - char[] fromUnicodeTableArray = null; - byte[] fromUnicodeBytesArray = null; if (header.version[0] == 5 && header.version[1] >= 3 && (header.options & MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK) == 0) { noFromU = ((header.options & MBCS_OPT_NO_FROM_U) != 0); @@ -258,8 +270,7 @@ class CharsetMBCS extends CharsetICU { if (offset != 0) { // agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null // terminator byte all already read; - mbcsTable.extIndexes = reader.readExtIndexes(offset - - (reader.bytesRead - reader.staticDataBytesRead)); + mbcsTable.extIndexes = reader.readExtIndexes(offset - reader.bytesReadAfterStaticData()); } } catch (IOException e) { throw new InvalidFormatException(); @@ -329,7 +340,7 @@ class CharsetMBCS extends CharsetICU { * for the extension converter separately when it is requested. */ mbcsTable.swapLFNLStateTable = null; - mbcsTable.swapLFNLFromUnicodeBytes = null; + mbcsTable.swapLFNLFromUnicodeChars = null; mbcsTable.swapLFNLName = null; /* @@ -412,50 +423,31 @@ class CharsetMBCS extends CharsetICU { throw new InvalidFormatException(); } - stateTableArray = new int[header.countStates][256]; - toUFallbacksArray = new MBCSToUFallback[header.countToUFallbacks]; - for (int i = 0; i < toUFallbacksArray.length; ++i) - toUFallbacksArray[i] = new MBCSToUFallback(); - unicodeCodeUnitsArray = new char[(header.offsetFromUTable - header.offsetToUCodeUnits) / 2]; - fromUnicodeTableArray = new char[(header.offsetFromUBytes - header.offsetFromUTable) / 2]; - fromUnicodeBytesArray = new byte[header.fromUBytesLength]; - try { - reader.readMBCSTable(stateTableArray, toUFallbacksArray, unicodeCodeUnitsArray, fromUnicodeTableArray, - fromUnicodeBytesArray); - } catch (IOException e) { - throw new InvalidFormatException(); - } - - mbcsTable.countStates = (byte) header.countStates; - mbcsTable.countToUFallbacks = header.countToUFallbacks; - mbcsTable.stateTable = stateTableArray; - mbcsTable.toUFallbacks = toUFallbacksArray; - mbcsTable.unicodeCodeUnits = unicodeCodeUnitsArray; - - mbcsTable.fromUnicodeTable = fromUnicodeTableArray; - mbcsTable.fromUnicodeBytes = fromUnicodeBytesArray; - mbcsTable.fromUBytesLength = header.fromUBytesLength; - /* * converter versions 6.1 and up contain a unicodeMask that is used here to select the most efficient * function implementations */ // agljport:fix info.size=sizeof(UDataInfo); // agljport:fix udata_getInfo((UDataMemory *)sharedData->dataMemory, &info); - // agljport:fix if(info.formatVersion[0]>6 || (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) { - /* mask off possible future extensions to be safe */ - mbcsTable.unicodeMask = (short) (staticData.unicodeMask & 3); - // agljport:fix } else { - /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */ - // agljport:fix mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY|UCNV_HAS_SURROGATES; - // agljport:fix } + if (reader.dataFormatHasUnicodeMask()) { + /* mask off possible future extensions to be safe */ + mbcsTable.unicodeMask = (short) (staticData.unicodeMask & 3); + } else { + /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */ + mbcsTable.unicodeMask = UConverterConstants.HAS_SUPPLEMENTARY | UConverterConstants.HAS_SURROGATES; + } + try { + reader.readMBCSTable(header, mbcsTable); + } catch (IOException e) { + throw new InvalidFormatException(); + } + if (offset != 0) { try { // agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null // terminator byte all already read; // int namelen = baseNameString != null? baseNameString.length() + 1: 0; - mbcsTable.extIndexes = reader.readExtIndexes(offset - - (reader.bytesRead - reader.staticDataBytesRead)); + mbcsTable.extIndexes = reader.readExtIndexes(offset - reader.bytesReadAfterStaticData()); } catch (IOException e) { throw new InvalidFormatException(); } @@ -468,12 +460,13 @@ class CharsetMBCS extends CharsetICU { if (mbcsTable.countStates == 1) { /* * SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher. - * Build a table with indexes to each block, to be used instaed of + * Build a table with indexes to each block, to be used instead of * the regular stage 1/2 table. */ - for (int i = 0; i < (SBCS_FAST_LIMIT>>6); ++i) { - mbcsTable.sbcsIndex[i] = mbcsTable.fromUnicodeTable[mbcsTable.fromUnicodeTable[i>>4]+((i<<2)&0x3c)]; - } +// sbcsIndex = new char[SBCS_FAST_LIMIT>>6]; +// for (int i = 0; i < (SBCS_FAST_LIMIT>>6); ++i) { +// mbcsTable.sbcsIndex[i] = mbcsTable.fromUnicodeTable[mbcsTable.fromUnicodeTable[i>>4]+((i<<2)&0x3c)]; +// } /* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header.version[2]>(SBCS_FAST_MAX>>8) */ mbcsTable.maxFastUChar = SBCS_FAST_MAX; } else { @@ -481,23 +474,21 @@ class CharsetMBCS extends CharsetICU { * MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher. * The .cnv file is prebuilt with an additional stage table with indexes to each block. */ - if (noFromU) { - mbcsTable.mbcsIndex = ByteBuffer.wrap(mbcsTable.fromUnicodeBytes).asCharBuffer(); - } mbcsTable.maxFastUChar = (char)((header.version[2]<<8) | 0xff); } } /* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */ { - long asciiRoundtrips = 0xffffffff; + int asciiRoundtrips = 0xffffffff; for (int i = 0; i < 0x80; ++i) { if (mbcsTable.stateTable[0][i] != MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) { - asciiRoundtrips&=~((long)1<<(i>>2))&UConverterConstants.UNSIGNED_INT_MASK; + asciiRoundtrips &= ~(1 << (i >> 2)); } } - mbcsTable.asciiRoundtrips = asciiRoundtrips&UConverterConstants.UNSIGNED_INT_MASK; + mbcsTable.asciiRoundtrips = asciiRoundtrips; } - + // TODO: Use asciiRoundtrips to speed up conversion, like in ICU4C. + if (noFromU) { int stage1Length = (mbcsTable.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) != 0 ? 0x440 : 0x40; int stage2Length = (header.offsetFromUBytes - header.offsetFromUTable)/4 - stage1Length/2; @@ -511,9 +502,11 @@ class CharsetMBCS extends CharsetICU { mbcsTable.asciiRoundtrips = 0; } } + // TODO: Use mbcsIndex to speed up UTF-16 conversion, like in ICU4C. + mbcsTable.mbcsIndex = null; return data; } - + private static boolean writeStage3Roundtrip(UConverterMBCSTable mbcsTable, long value, int codePoints[]) { char[] table; byte[] bytes; @@ -524,7 +517,10 @@ class CharsetMBCS extends CharsetICU { long temp; table = mbcsTable.fromUnicodeTable; + int[] tableInts = mbcsTable.fromUnicodeTableInts; bytes = mbcsTable.fromUnicodeBytes; + char[] chars = mbcsTable.fromUnicodeChars; + int[] ints = mbcsTable.fromUnicodeInts; /* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */ switch(mbcsTable.outputType) { @@ -564,7 +560,7 @@ class CharsetMBCS extends CharsetICU { /* locate the stage 2 & 3 data */ stage2 = table[c>>10] + ((c>>4)&0x3f); - st3 = table[stage2*2]<<16|table[stage2*2 + 1]; + st3 = tableInts[stage2]; st3 = (int)(char)(st3 * 16 + (c&0xf)); /* write the codepage bytes into stage 3 */ @@ -577,54 +573,42 @@ class CharsetMBCS extends CharsetICU { bytes[p+2] = (byte)value; break; case MBCS_OUTPUT_4: - bytes[st3*4] = (byte)(value >> 24); - bytes[st3*4 + 1] = (byte)(value >> 16); - bytes[st3*4 + 2] = (byte)(value >> 8); - bytes[st3*4 + 3] = (byte)value; + ints[st3] = (int)value; break; default: /* 2 bytes per character */ - bytes[st3*2] = (byte)(value >> 8); - bytes[st3*2 + 1] = (byte)value; + chars[st3] = (char)value; break; } /* set the roundtrip flag */ temp = (1L<<(16+(c&0xf))); - table[stage2*2] |= (char)(temp>>16); - table[stage2*2 + 1] |= (char)temp; + tableInts[stage2] |= temp; } return true; } - - private static void reconstituteData(UConverterMBCSTable mbcsTable, int stage1Length, int stage2Length, int fullStage2Length) { - int datalength = stage1Length*2+fullStage2Length*4+mbcsTable.fromUBytesLength; - int offset = 0; - byte[] stage = new byte[datalength]; - - for (int i = 0; i < stage1Length; ++i) { - stage[i*2] = (byte)(mbcsTable.fromUnicodeTable[i]>>8); - stage[i*2+1] = (byte)(mbcsTable.fromUnicodeTable[i]); - } - - offset = ((fullStage2Length - stage2Length) * 4) + (stage1Length * 2); - for (int i = 0; i < stage2Length; ++i) { - stage[offset + i*4] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2]>>8); - stage[offset + i*4+1] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2]); - stage[offset + i*4+2] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2+1]>>8); - stage[offset + i*4+3] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2+1]); - } - - /* indexes into stage 2 count from the bottom of the fromUnicodeTable */ - - /* reconsitute the initial part of stage 2 from the mbcsIndex */ + + private static void reconstituteData(UConverterMBCSTable mbcsTable, + int stage1Length, int stage2Length, int fullStage2Length) { + char[] stage1 = mbcsTable.fromUnicodeTable; + + // stage2 starts with unused stage1 space. + // Indexes into stage 2 count from the bottom of the fromUnicodeTable. + int numStage1Ints = stage1Length / 2; // 2 chars = 1 int + int[] stage2 = new int[numStage1Ints + fullStage2Length]; + System.arraycopy(mbcsTable.fromUnicodeTableInts, numStage1Ints, + stage2, (fullStage2Length - stage2Length) + numStage1Ints, + stage2Length); + mbcsTable.fromUnicodeTableInts = stage2; + + /* reconstitute the initial part of stage 2 from the mbcsIndex */ { int stageUTF8Length=(mbcsTable.maxFastUChar+1)>>6; int stageUTF8Index=0; int st1, st2, st3, i; - + for (st1 = 0; stageUTF8Index < stageUTF8Length; ++st1) { - st2 = ((char)stage[2*st1]<<8) | (0xff & stage[2*st1+1]); + st2 = stage1[st1]; if (st2 != stage1Length/2) { /* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */ for (i = 0; i < 16; ++i) { @@ -636,10 +620,10 @@ class CharsetMBCS extends CharsetICU { * 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are * allocated together as a single 64-block for access from the mbcsIndex */ - stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++; - stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++; - stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++; - stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); + stage2[st2++] = st3++; + stage2[st2++] = st3++; + stage2[st2++] = st3++; + stage2[st2++] = st3; } else { /* no stage 3 block, skip */ st2+=4; @@ -651,17 +635,25 @@ class CharsetMBCS extends CharsetICU { } } } - - char[] stage1 = new char[stage.length/2]; - for (int i = 0; i < stage1.length; ++i) { - stage1[i] = (char)(((stage[i*2])<<8)|(stage[i*2+1] & UConverterConstants.UNSIGNED_BYTE_MASK)); + + switch (mbcsTable.outputType) { + case CharsetMBCS.MBCS_OUTPUT_2: + case CharsetMBCS.MBCS_OUTPUT_2_SISO: + case CharsetMBCS.MBCS_OUTPUT_3_EUC: + mbcsTable.fromUnicodeChars = new char[mbcsTable.fromUBytesLength / 2]; + break; + case CharsetMBCS.MBCS_OUTPUT_3: + case CharsetMBCS.MBCS_OUTPUT_4_EUC: + mbcsTable.fromUnicodeBytes = new byte[mbcsTable.fromUBytesLength]; + break; + case CharsetMBCS.MBCS_OUTPUT_4: + mbcsTable.fromUnicodeInts = new int[mbcsTable.fromUBytesLength / 4]; + break; + default: + // Cannot occur, caller checked already. + assert false; } - byte[] stage2 = new byte[stage.length - ((stage1Length * 2) + (fullStage2Length * 4))]; - System.arraycopy(stage, ((stage1Length * 2) + (fullStage2Length * 4)), stage2, 0, stage2.length); - - mbcsTable.fromUnicodeTable = stage1; - mbcsTable.fromUnicodeBytes = stage2; - + /* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */ MBCSEnumToUnicode(mbcsTable); } @@ -786,7 +778,7 @@ class CharsetMBCS extends CharsetICU { } if (((++b)&0x1f) == 0) { if(anyCodePoints>=0) { - if(!writeStage3Roundtrip(mbcsTable, value|(b-0x20)&UConverterConstants.UNSIGNED_INT_MASK, codePoints)) { + if(!writeStage3Roundtrip(mbcsTable, value|(b-0x20), codePoints)) { return false; } anyCodePoints=-1; @@ -965,30 +957,26 @@ class CharsetMBCS extends CharsetICU { private boolean EBCDICSwapLFNL() throws Exception { UConverterMBCSTable mbcsTable; - + char[] table; - byte[] results; - byte[] bytes; - + int[][] newStateTable; - byte[] newResults; String newName; - + int stage2Entry; -// int size; - int sizeofFromUBytes; - + mbcsTable = sharedData.mbcs; table = mbcsTable.fromUnicodeTable; - bytes = mbcsTable.fromUnicodeBytes; - results = bytes; - + int[] tableInts = sharedData.mbcs.fromUnicodeTableInts; + char[] chars = mbcsTable.fromUnicodeChars; + char[] results = chars; + /* * Check that this is an EBCDIC table with SBCS portion - * SBCS or EBCDIC with standard EBCDIC LF and NL mappings. * - * If not, ignore the option Options are always ignored if they do not apply. + * If not, ignore the option. Options are always ignored if they do not apply. */ if (!((mbcsTable.outputType == MBCS_OUTPUT_1 || mbcsTable.outputType == MBCS_OUTPUT_2_SISO) && mbcsTable.stateTable[0][EBCDIC_LF] == MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) && @@ -1002,15 +990,15 @@ class CharsetMBCS extends CharsetICU { return false; } } else /* MBCS_OUTPUT_2_SISO */ { - stage2Entry = MBCS_STAGE_2_FROM_U(table, U_LF); + stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_LF); if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF) && - EBCDIC_LF == MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF))) { + EBCDIC_LF == MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, U_LF))) { return false; } - stage2Entry = MBCS_STAGE_2_FROM_U(table, U_NL); + stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_NL); if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL) && - EBCDIC_NL == MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL))) { + EBCDIC_NL == MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, U_NL))) { return false; } } @@ -1020,7 +1008,7 @@ class CharsetMBCS extends CharsetICU { * We _know_ the number of bytes in the fromUnicodeBytes array * starting with header.version 4.1. */ - sizeofFromUBytes = mbcsTable.fromUBytesLength; + // sizeofFromUBytes = mbcsTable.fromUBytesLength; } else { /* * Otherwise: @@ -1050,26 +1038,26 @@ class CharsetMBCS extends CharsetICU { newStateTable[0][EBCDIC_NL] = MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF); /* copy and modify the from-Unicode result table */ - newResults = new byte[sizeofFromUBytes]; - System.arraycopy(bytes, 0, newResults, 0, sizeofFromUBytes); + char[] newResults = new char[chars.length]; + System.arraycopy(chars, 0, newResults, 0, chars.length); /* conveniently, the table access macros work on the left side of expressions */ if (mbcsTable.outputType == MBCS_OUTPUT_1) { MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_LF, EBCDIC_RT_NL); MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_NL, EBCDIC_RT_LF); } else /* MBCS_OUTPUT_2_SISO */ { - stage2Entry = MBCS_STAGE_2_FROM_U(table, U_LF); + stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_LF); MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_LF, EBCDIC_NL); - stage2Entry = MBCS_STAGE_2_FROM_U(table, U_NL); + stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_NL); MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_NL, EBCDIC_LF); } /* set the canonical converter name */ newName = icuCanonicalName.concat(UConverterConstants.OPTION_SWAP_LFNL_STRING); - + if (mbcsTable.swapLFNLStateTable == null) { mbcsTable.swapLFNLStateTable = newStateTable; - mbcsTable.swapLFNLFromUnicodeBytes = newResults; + mbcsTable.swapLFNLFromUnicodeChars = newResults; mbcsTable.swapLFNLName = newName; } return true; @@ -1094,13 +1082,13 @@ class CharsetMBCS extends CharsetICU { /* GB 18030 data ------------------------------------------------------------ */ /* helper macros for linear values for GB 18030 four-byte sequences */ - private static long LINEAR_18030(long a, long b, long c, long d) { - return ((((a & 0xff) * 10 + (b & 0xff)) * 126L + (c & 0xff)) * 10L + (d & 0xff)); + private static int LINEAR_18030(int a, int b, int c, int d) { + return ((((a & 0xff) * 10 + (b & 0xff)) * 126 + (c & 0xff)) * 10 + (d & 0xff)); } - private static long LINEAR_18030_BASE = LINEAR_18030(0x81, 0x30, 0x81, 0x30); + private static int LINEAR_18030_BASE = LINEAR_18030(0x81, 0x30, 0x81, 0x30); - private static long LINEAR(long x) { + private static int LINEAR(int x) { return LINEAR_18030(x >>> 24, (x >>> 16) & 0xff, (x >>> 8) & 0xff, x & 0xff); } @@ -1111,21 +1099,21 @@ class CharsetMBCS extends CharsetICU { * * Note that single surrogates are not mapped by GB 18030 as of the re-released mapping tables from 2000-nov-30. */ - private static final long gb18030Ranges[][] = new long[/* 14 */][/* 4 */] { - { 0x10000L, 0x10FFFFL, LINEAR(0x90308130L), LINEAR(0xE3329A35L) }, - { 0x9FA6L, 0xD7FFL, LINEAR(0x82358F33L), LINEAR(0x8336C738L) }, - { 0x0452L, 0x1E3EL, LINEAR(0x8130D330L), LINEAR(0x8135F436L) }, - { 0x1E40L, 0x200FL, LINEAR(0x8135F438L), LINEAR(0x8136A531L) }, - { 0xE865L, 0xF92BL, LINEAR(0x8336D030L), LINEAR(0x84308534L) }, - { 0x2643L, 0x2E80L, LINEAR(0x8137A839L), LINEAR(0x8138FD38L) }, - { 0xFA2AL, 0xFE2FL, LINEAR(0x84309C38L), LINEAR(0x84318537L) }, - { 0x3CE1L, 0x4055L, LINEAR(0x8231D438L), LINEAR(0x8232AF32L) }, - { 0x361BL, 0x3917L, LINEAR(0x8230A633L), LINEAR(0x8230F237L) }, - { 0x49B8L, 0x4C76L, LINEAR(0x8234A131L), LINEAR(0x8234E733L) }, - { 0x4160L, 0x4336L, LINEAR(0x8232C937L), LINEAR(0x8232F837L) }, - { 0x478EL, 0x4946L, LINEAR(0x8233E838L), LINEAR(0x82349638L) }, - { 0x44D7L, 0x464BL, LINEAR(0x8233A339L), LINEAR(0x8233C931L) }, - { 0xFFE6L, 0xFFFFL, LINEAR(0x8431A234L), LINEAR(0x8431A439L) } }; + private static final int gb18030Ranges[][] = new int[/* 14 */][/* 4 */] { + { 0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35) }, + { 0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738) }, + { 0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436) }, + { 0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531) }, + { 0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534) }, + { 0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38) }, + { 0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537) }, + { 0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32) }, + { 0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237) }, + { 0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733) }, + { 0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837) }, + { 0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638) }, + { 0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931) }, + { 0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439) } }; /* bit flag for UConverter.options indicating GB 18030 special handling */ private static final int MBCS_OPTION_GB18030 = 0x8000; @@ -1270,57 +1258,51 @@ class CharsetMBCS extends CharsetICU { * single-state codepages that only map to and from BMP code points, and it always returns fallback values. */ static char MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(UConverterMBCSTable mbcs, final int b) { - return MBCS_ENTRY_FINAL_VALUE_16(mbcs.stateTable[0][b & UConverterConstants.UNSIGNED_BYTE_MASK]); + assert 0 <= b && b <= 0xff; + return MBCS_ENTRY_FINAL_VALUE_16(mbcs.stateTable[0][b]); } /* single-byte fromUnicode: get the 16-bit result word */ - static char MBCS_SINGLE_RESULT_FROM_U(char[] table, byte[] results, int c) { + static char MBCS_SINGLE_RESULT_FROM_U(char[] table, char[] results, int c) { int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f); - int i = 2 * (table[i1] + (c & 0xf)); // used as index into byte[] array treated as char[] array - return (char) (((results[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (results[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK)); + int i = table[i1] + (c & 0xf); + return results[i]; } /* single-byte fromUnicode: set the 16-bit result word with newValue*/ - static void MBCS_SINGLE_RESULT_FROM_U_SET(char[] table, byte[] results, int c, int newValue) { + static void MBCS_SINGLE_RESULT_FROM_U_SET(char[] table, char[] results, int c, int newValue) { int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f); - int i = 2 * (table[i1] + (c & 0xf)); // used as index into byte[] array treated as char[] array - results[i] = (byte)((newValue >> 8) & UConverterConstants.UNSIGNED_BYTE_MASK); - results[i + 1] = (byte)(newValue & UConverterConstants.UNSIGNED_BYTE_MASK); + int i = table[i1] + (c & 0xf); + results[i] = (char) newValue; } /* multi-byte fromUnicode: get the 32-bit stage 2 entry */ - static int MBCS_STAGE_2_FROM_U(char[] table, int c) { - int i = 2 * (table[(c) >>> 10] + ((c >>> 4) & 0x3f)); // 2x because used as index into char[] array treated as - // int[] array - return ((table[i] & UConverterConstants.UNSIGNED_SHORT_MASK) << 16) - | (table[i + 1] & UConverterConstants.UNSIGNED_SHORT_MASK); + static int MBCS_STAGE_2_FROM_U(char[] table, int[] tableInts, int c) { + int i = table[(c) >>> 10] + ((c >>> 4) & 0x3f); + return tableInts[i]; } private static boolean MBCS_FROM_U_IS_ROUNDTRIP(int stage2Entry, int c) { return (((stage2Entry) & (1 << (16 + ((c) & 0xf)))) != 0); } - static char MBCS_VALUE_2_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) { - int i = 2 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf)); - return (char) (((bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (bytes[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK)); - } - - static void MBCS_VALUE_2_FROM_STAGE_2_SET(byte[] bytes, int stage2Entry, int c, int newValue) { - int i = 2 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf)); - bytes[i] = (byte)((newValue >> 8) & UConverterConstants.UNSIGNED_BYTE_MASK); - bytes[i + 1] = (byte)(newValue & UConverterConstants.UNSIGNED_BYTE_MASK); + static char MBCS_VALUE_2_FROM_STAGE_2(char[] chars, int stage2Entry, int c) { + int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf); + return chars[i]; } - private static int MBCS_VALUE_4_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) { - int i = 4 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf)); - return ((bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 24) - | ((bytes[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16) - | ((bytes[i + 2] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) - | (bytes[i + 3] & UConverterConstants.UNSIGNED_BYTE_MASK); + static void MBCS_VALUE_2_FROM_STAGE_2_SET(char[] chars, int stage2Entry, int c, int newValue) { + int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf); + chars[i] = (char) newValue; + } + + private static int MBCS_VALUE_4_FROM_STAGE_2(int[] ints, int stage2Entry, int c) { + int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf); + return ints[i]; } static int MBCS_POINTER_3_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) { - return ((16 * ((char) (stage2Entry) & UConverterConstants.UNSIGNED_SHORT_MASK) + ((c) & 0xf)) * 3); + return ((16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + ((c) & 0xf)) * 3); } // ------------UConverterExt------------------------------------------------------- @@ -1384,7 +1366,7 @@ class CharsetMBCS extends CharsetICU { } static boolean TO_U_IS_PARTIAL(int value) { - return (value & UConverterConstants.UNSIGNED_INT_MASK) < TO_U_MIN_CODE_POINT; + return 0 <= value && value < TO_U_MIN_CODE_POINT; } static int TO_U_GET_PARTIAL_INDEX(int value) { @@ -1396,16 +1378,19 @@ class CharsetMBCS extends CharsetICU { } private static int TO_U_MAKE_WORD(byte b, int value) { - return ((b & UConverterConstants.UNSIGNED_BYTE_MASK) << TO_U_BYTE_SHIFT) | value; + // TO_U_BYTE_SHIFT == 24: safe to just shift the signed byte-as-int. + return (b << TO_U_BYTE_SHIFT) | value; } /* use after masking off the roundtrip flag */ static boolean TO_U_IS_CODE_POINT(int value) { - return (value & UConverterConstants.UNSIGNED_INT_MASK) <= TO_U_MAX_CODE_POINT; + assert value >= 0; + return value <= TO_U_MAX_CODE_POINT; } static int TO_U_GET_CODE_POINT(int value) { - return (int) ((value & UConverterConstants.UNSIGNED_INT_MASK) - TO_U_MIN_CODE_POINT); + assert value >= 0; + return value - TO_U_MIN_CODE_POINT; } private static int TO_U_GET_INDEX(int value) { @@ -1474,6 +1459,9 @@ class CharsetMBCS extends CharsetICU { int oldpos = indexes.position(); Buffer b; + // TODO: It is very inefficient to create Buffer objects for each array access. + // We should create an inner class Extensions (or sibling class CharsetMBCSExtensions) + // which has buffers for the arrays, together with the code that works with them. indexes.position(indexes.getInt(index << 2)); if (itemType == int.class) b = indexes.asIntBuffer(); @@ -1900,8 +1888,8 @@ class CharsetMBCS extends CharsetICU { /* GB 18030 */ if (length == 4 && (options & MBCS_OPTION_GB18030) != 0) { - long[] range; - long linear; + int[] range; + int linear; int i; linear = LINEAR_18030(toUBytesArray[0], toUBytesArray[1], toUBytesArray[2], toUBytesArray[3]); @@ -1915,7 +1903,7 @@ class CharsetMBCS extends CharsetICU { linear = range[0] + (linear - range[2]); /* output this code point */ - cr[0] = toUWriteCodePoint((int) linear, target, offsets, sourceIndex); + cr[0] = toUWriteCodePoint(linear, target, offsets, sourceIndex); return 0; } @@ -2045,7 +2033,7 @@ class CharsetMBCS extends CharsetICU { } if (sharedData.mbcs.countStates == 1) { - if ((sharedData.mbcs.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) { + if (!sharedData.mbcs.hasSupplementary()) { cr[0] = cnvMBCSSingleToBMPWithOffsets(source, target, offsets, flush); } else { cr[0] = cnvMBCSSingleToUnicodeWithOffsets(source, target, offsets, flush); @@ -2818,7 +2806,7 @@ class CharsetMBCS extends CharsetICU { for (b = 0; b <= 0xff; b++) { entry = row[b]; if (MBCS_ENTRY_IS_TRANSITION(entry) && - hasValidTrailBytes(stateTable, (short)(MBCS_ENTRY_TRANSITION_STATE(entry) & UConverterConstants.UNSIGNED_BYTE_MASK))) { + hasValidTrailBytes(stateTable, (short)MBCS_ENTRY_TRANSITION_STATE(entry))) { return true; } } @@ -2829,9 +2817,9 @@ class CharsetMBCS extends CharsetICU { int[] row = stateTable[state]; int entry = row[b]; if (MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */ - return hasValidTrailBytes(stateTable, (short)(MBCS_ENTRY_TRANSITION_STATE(entry) & UConverterConstants.UNSIGNED_BYTE_MASK)); + return hasValidTrailBytes(stateTable, (short)MBCS_ENTRY_TRANSITION_STATE(entry)); } else { - short action = (short)(MBCS_ENTRY_FINAL_ACTION(entry) & UConverterConstants.UNSIGNED_BYTE_MASK); + int action = MBCS_ENTRY_FINAL_ACTION(entry); if (action == MBCS_STATE_CHANGE_ONLY && isDBCSOnly) { return false; /* SI/SO are illegal for DBCS-only conversion */ } else { @@ -2866,6 +2854,8 @@ class CharsetMBCS extends CharsetICU { int sourceArrayIndex; char[] table; byte[] pArray, bytes; + char[] chars; + int[] ints; int pArrayIndex, outputType, c; int prevSourceIndex, sourceIndex, nextSourceIndex; int stage2Entry = 0, value = 0, length = 0, prevLength; @@ -2908,12 +2898,15 @@ class CharsetMBCS extends CharsetICU { } table = sharedData.mbcs.fromUnicodeTable; + int[] tableInts = sharedData.mbcs.fromUnicodeTableInts; sourceArrayIndex = source.position(); + bytes = sharedData.mbcs.fromUnicodeBytes; + ints = sharedData.mbcs.fromUnicodeInts; if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) { - bytes = sharedData.mbcs.swapLFNLFromUnicodeBytes; + chars = sharedData.mbcs.swapLFNLFromUnicodeChars; } else { - bytes = sharedData.mbcs.fromUnicodeBytes; + chars = sharedData.mbcs.fromUnicodeChars; } // asciiRoundtrips = sharedData.mbcs.asciiRoundtrips; @@ -3054,7 +3047,7 @@ class CharsetMBCS extends CharsetICU { * byte may be output if the "assigned" bit in stage 2 was on. The data structure does not * support zero byte output as a fallback, and also does not allow output of leading zeros. */ - stage2Entry = MBCS_STAGE_2_FROM_U(table, c); + stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c); /* get the bytes and the length for the output */ switch (outputType) { @@ -3078,8 +3071,8 @@ class CharsetMBCS extends CharsetICU { * callback function changed it for its output. */ fromUnicodeStatus = prevLength; /* save the old state */ - value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); - if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) { + value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c); + if (value <= 0xff) { if (value == 0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) == false) { /* no mapping, leave value==0 */ length = 0; @@ -3116,8 +3109,8 @@ class CharsetMBCS extends CharsetICU { break; case MBCS_OUTPUT_DBCS_ONLY: /* table with single-byte results, but only DBCS mappings used */ - value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); - if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) { + value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c); + if (value <= 0xff) { /* no mapping or SBCS result, not taken for DBCS-only */ value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */ length = 0; @@ -3131,30 +3124,33 @@ class CharsetMBCS extends CharsetICU { value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16) | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK); - if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) { + if (value <= 0xff) { length = 1; - } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) { + } else if (value <= 0xffff) { length = 2; } else { length = 3; } break; case MBCS_OUTPUT_4: - value = MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c); - if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) { + value = MBCS_VALUE_4_FROM_STAGE_2(ints, stage2Entry, c); + if (value < 0) { + // Half of the 4-byte values look negative in a signed int. + length = 4; + } else if (value <= 0xff) { length = 1; - } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) { + } else if (value <= 0xffff) { length = 2; - } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffffff) { + } else if (value <= 0xffffff) { length = 3; } else { length = 4; } break; case MBCS_OUTPUT_3_EUC: - value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); + value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c); /* EUC 16-bit fixed-length representation */ - if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) { + if (value <= 0xff) { length = 1; } else if ((value & 0x8000) == 0) { value |= 0x8e8000; @@ -3173,9 +3169,9 @@ class CharsetMBCS extends CharsetICU { | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK); /* EUC 16-bit fixed-length representation applied to the first two bytes */ - if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) { + if (value <= 0xff) { length = 1; - } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) { + } else if (value <= 0xffff) { length = 2; } else if ((value & 0x800000) == 0) { value |= 0x8e800000; @@ -3396,24 +3392,25 @@ class CharsetMBCS extends CharsetICU { int p; /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ - if (c <= 0xffff || ((sharedData.mbcs.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) != 0)) { + if (c <= 0xffff || sharedData.mbcs.hasSupplementary()) { table = sharedData.mbcs.fromUnicodeTable; /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */ if (sharedData.mbcs.outputType == MBCS_OUTPUT_1) { - value = MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c); + value = MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeChars, c); /* is this code point assigned, or do we use fallbacks? */ if (isUseFallback ? value >= 0x800 : value >= 0xc00) { pValue[0] = value & 0xff; return 1; } } else /* outputType!=MBCS_OUTPUT_1 */{ - stage2Entry = MBCS_STAGE_2_FROM_U(table, c); + int[] tableInts = sharedData.mbcs.fromUnicodeTableInts; + stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c); /* get the bytes and the length for the output */ switch (sharedData.mbcs.outputType) { case MBCS_OUTPUT_2: - value = MBCS_VALUE_2_FROM_STAGE_2(sharedData.mbcs.fromUnicodeBytes, stage2Entry, c); + value = MBCS_VALUE_2_FROM_STAGE_2(sharedData.mbcs.fromUnicodeChars, stage2Entry, c); if (value <= 0xff) { length = 1; } else { @@ -3883,28 +3880,27 @@ class CharsetMBCS extends CharsetICU { * @return if(U_FAILURE) return the code point for cnv->fromUChar32 else return 0 after output has been written * to the target */ - private int fromU(int cp_, CharBuffer source, ByteBuffer target, IntBuffer offsets, int sourceIndex, + private int fromU(int cp, CharBuffer source, ByteBuffer target, IntBuffer offsets, int sourceIndex, int length, boolean flush, CoderResult[] cr) { // ByteBuffer cx; - long cp = cp_ & UConverterConstants.UNSIGNED_INT_MASK; useSubChar1 = false; if (sharedData.mbcs.extIndexes != null - && initialMatchFromU((int) cp, source, target, offsets, sourceIndex, flush, cr)) { + && initialMatchFromU(cp, source, target, offsets, sourceIndex, flush, cr)) { return 0; /* an extension mapping handled the input */ } /* GB 18030 */ if ((options & MBCS_OPTION_GB18030) != 0) { - long[] range; + int[] range; int i; for (i = 0; i < gb18030Ranges.length; ++i) { range = gb18030Ranges[i]; if (range[0] <= cp && cp <= range[1]) { /* found the Unicode code point, output the four-byte sequence for it */ - long linear; + int linear; byte bytes[] = new byte[4]; /* get the linear value of the first GB 18030 code in this range */ @@ -3996,7 +3992,7 @@ class CharsetMBCS extends CharsetICU { int sourceArrayIndex, lastSource; int targetCapacity, length; char[] table; - byte[] results; + char[] results; int c, sourceIndex; char value, minValue; @@ -4007,12 +4003,9 @@ class CharsetMBCS extends CharsetICU { table = sharedData.mbcs.fromUnicodeTable; if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) { - results = sharedData.mbcs.swapLFNLFromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes - // be a ByteBuffer so results can be a 16-bit view - // of it? + results = sharedData.mbcs.swapLFNLFromUnicodeChars; } else { - results = sharedData.mbcs.fromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes be a - // ByteBuffer so results can be a 16-bit view of it? + results = sharedData.mbcs.fromUnicodeChars; } if (useFallback) { @@ -4164,7 +4157,7 @@ class CharsetMBCS extends CharsetICU { int sourceArrayIndex; char[] table; - byte[] results; // agljport:comment results is used to to get 16-bit values out of byte[] array + char[] results; int c; int sourceIndex, nextSourceIndex; @@ -4178,12 +4171,9 @@ class CharsetMBCS extends CharsetICU { table = sharedData.mbcs.fromUnicodeTable; if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) { - results = sharedData.mbcs.swapLFNLFromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes - // be a ByteBuffer so results can be a 16-bit view - // of it? + results = sharedData.mbcs.swapLFNLFromUnicodeChars; } else { - results = sharedData.mbcs.fromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes be a - // ByteBuffer so results can be a 16-bit view of it? + results = sharedData.mbcs.fromUnicodeChars; } if (useFallback) { @@ -4316,7 +4306,7 @@ class CharsetMBCS extends CharsetICU { int sourceArrayIndex; char[] table; - byte[] bytes; + char[] chars; int c, sourceIndex, nextSourceIndex; @@ -4332,11 +4322,12 @@ class CharsetMBCS extends CharsetICU { sourceArrayIndex = source.position(); table = sharedData.mbcs.fromUnicodeTable; + int[] tableInts = sharedData.mbcs.fromUnicodeTableInts; if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) { - bytes = sharedData.mbcs.swapLFNLFromUnicodeBytes; + chars = sharedData.mbcs.swapLFNLFromUnicodeChars; } else { - bytes = sharedData.mbcs.fromUnicodeBytes; + chars = sharedData.mbcs.fromUnicodeChars; } /* get the converter state from UConverter */ @@ -4413,12 +4404,12 @@ class CharsetMBCS extends CharsetICU { } /* convert the Unicode code point in c into codepage bytes */ - stage2Entry = MBCS_STAGE_2_FROM_U(table, c); + stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c); /* get the bytes and the length for the output */ /* MBCS_OUTPUT_2 */ - value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); - if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) { + value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c); + if (value <= 0xff) { length = 1; } else { length = 2; @@ -4780,7 +4771,7 @@ class CharsetMBCS extends CharsetICU { mbcsTable = data.mbcs; table = mbcsTable.fromUnicodeTable; - if((mbcsTable.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY)!=0){ + if(mbcsTable.hasSupplementary()){ maxStage1 = 0x440; } else{ @@ -4791,9 +4782,8 @@ class CharsetMBCS extends CharsetICU { if(mbcsTable.outputType==MBCS_OUTPUT_1){ char stage2, stage3; char minValue; - CharBuffer results; - results = ByteBuffer.wrap(mbcsTable.fromUnicodeBytes).asCharBuffer(); - + char[] results = mbcsTable.fromUnicodeChars; + if(which==ROUNDTRIP_SET) { /* use only roundtrips */ minValue=0xf00; @@ -4811,10 +4801,9 @@ class CharsetMBCS extends CharsetICU { /*read the stage 3 block */ stage3 = (char)st3; do { - if(results.get(stage3++)>=minValue){ + if(results[stage3++]>=minValue){ setFillIn.add(c); } - }while((++c&0xf) !=0); } else { c+= 16; /*empty stage 2 block */ @@ -4825,12 +4814,15 @@ class CharsetMBCS extends CharsetICU { } } } else { + int[] tableInts = mbcsTable.fromUnicodeTableInts; int stage2,stage3; byte[] bytes; int st3Multiplier; int value; boolean useFallBack; bytes = mbcsTable.fromUnicodeBytes; + char[] chars = mbcsTable.fromUnicodeChars; + int[] ints = mbcsTable.fromUnicodeInts; useFallBack = (which == ROUNDTRIP_AND_FALLBACK_SET); switch(mbcsTable.outputType) { case MBCS_OUTPUT_3: @@ -4844,49 +4836,41 @@ class CharsetMBCS extends CharsetICU { st3Multiplier =2; break; } - //ByteBuffer buffer = (ByteBuffer)charTobyte(table); - + for(st1=0;st1(maxStage1>>1)){ stage2 = st2 ; - for(st2=0;st2<128;++st2){ + for(st2=0;st2<64;++st2){ /*read the stage 3 block */ - st3 = table[stage2*2 + st2]<<16; - st3+=table[stage2*2 + ++st2]; + st3 = tableInts[stage2 + st2]; if(st3!=0){ //if((st3=table[stage2+st2])!=0){ stage3 = st3Multiplier*16*(st3&UConverterConstants.UNSIGNED_SHORT_MASK); - + /* get the roundtrip flags for the stage 3 block */ - st3>>=16; - st3 &= UConverterConstants.UNSIGNED_SHORT_MASK; + st3>>>=16; switch(filter) { case UCNV_SET_FILTER_NONE: do { - if((st3&1)!=0){ setFillIn.add(c); - stage3+=st3Multiplier; }else if (useFallBack) { - - char b =0; + int b =0; switch(st3Multiplier) { - case 4 : - - b|= ByteBuffer.wrap(bytes).getChar(stage3++); - - case 3 : - - b|= ByteBuffer.wrap(bytes).getChar(stage3++); - - case 2 : - - b|= ByteBuffer.wrap(bytes).getChar(stage3) | ByteBuffer.wrap(bytes).getChar(stage3+1); - stage3+=2; + case 4: + b = ints[stage3 / 4]; + break; + case 3: + b |= bytes[stage3] | bytes[stage3 + 1] | bytes[stage3 + 2]; + break; + case 2: + b = chars[stage3 / 2]; + break; default: break; } + stage3+=st3Multiplier; if(b!=0) { setFillIn.add(c); } @@ -4897,8 +4881,7 @@ class CharsetMBCS extends CharsetICU { case UCNV_SET_FILTER_DBCS_ONLY: /* Ignore single bytes results (<0x100). */ do { - if(((st3&1) != 0 || useFallBack) && - (UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))) >= 0x100){ + if(((st3&1) != 0 || useFallBack) && chars[stage3 / 2] >= 0x100){ setFillIn.add(c); } st3>>=1; @@ -4909,7 +4892,7 @@ class CharsetMBCS extends CharsetICU { /* only add code points that map to CNS 11643 planes 1&2 for non-EXT ISO-2202-CN. */ do { if(((st3&1) != 0 || useFallBack) && - ((value= (UConverterConstants.UNSIGNED_BYTE_MASK & (ByteBuffer.wrap(bytes).get(stage3))))==0x81 || value==0x82) ){ + ((value= (UConverterConstants.UNSIGNED_BYTE_MASK & bytes[stage3]))==0x81 || value==0x82) ){ setFillIn.add(c); } st3>>=1; @@ -4919,8 +4902,7 @@ class CharsetMBCS extends CharsetICU { case UCNV_SET_FILTER_SJIS: /* only add code points that map tp Shift-JIS codes corrosponding to JIS X 0280. */ do{ - - if(((st3&1) != 0 || useFallBack) && (value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))>=0x8140 && value<=0xeffc){ + if(((st3&1) != 0 || useFallBack) && (value=chars[stage3 / 2])>=0x8140 && value<=0xeffc){ setFillIn.add(c); } st3>>=1; @@ -4931,7 +4913,7 @@ class CharsetMBCS extends CharsetICU { /* only add code points that maps to ISO 2022 GR 94 DBCS codes*/ do { if(((st3&1) != 0 || useFallBack) && - (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))- 0xa1a1))<=(0xfefe - 0xa1a1) && + (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=chars[stage3 / 2])- 0xa1a1))<=(0xfefe - 0xa1a1) && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){ setFillIn.add(c); } @@ -4943,7 +4925,7 @@ class CharsetMBCS extends CharsetICU { /*Only add code points that are suitable for HZ DBCS*/ do { if( ((st3&1) != 0 || useFallBack) && - (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))-0xa1a1))<=(0xfdfe - 0xa1a1) && + (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=chars[stage3 / 2])-0xa1a1))<=(0xfdfe - 0xa1a1) && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){ setFillIn.add(c); } @@ -5056,7 +5038,7 @@ class CharsetMBCS extends CharsetICU { if(st3!= 0){ ps3 = st3; do { - value = stage3b.get(UConverterConstants.UNSIGNED_SHORT_MASK&stage3.get(ps3++)); + value = stage3b.get(stage3.get(ps3++)); if(value==0){ /* no mapping do nothing */ }else if (FROM_U_IS_PARTIAL(value)){ @@ -5078,14 +5060,13 @@ class CharsetMBCS extends CharsetICU { } break; case UCNV_SET_FILTER_GR94DBCS: - if(!(FROM_U_GET_LENGTH(value)==2 && (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=FROM_U_GET_DATA(value)) - 0xa1a1))<=(0xfefe - 0xa1a1) + if(!(FROM_U_GET_LENGTH(value)==2 && ((value=FROM_U_GET_DATA(value)) - 0xa1a1)<=(0xfefe - 0xa1a1) && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){ - continue; } break; case UCNV_SET_FILTER_HZ: - if(!(FROM_U_GET_LENGTH(value)==2 && (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=FROM_U_GET_DATA(value)) - 0xa1a1))<=(0xfdfe - 0xa1a1) + if(!(FROM_U_GET_LENGTH(value)==2 && ((value=FROM_U_GET_DATA(value)) - 0xa1a1)<=(0xfdfe - 0xa1a1) && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){ continue; } diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAlias.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAlias.java index 4bc4921cac9..2d53b887e79 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAlias.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAlias.java @@ -10,12 +10,9 @@ package com.ibm.icu.charset; import java.io.IOException; -import java.io.InputStream; import java.nio.ByteBuffer; import com.ibm.icu.impl.ICUBinary; -import com.ibm.icu.impl.ICUData; -import com.ibm.icu.impl.ICUResourceBundle; final class UConverterAlias { static final int UNNORMALIZED = 0; @@ -115,13 +112,12 @@ final class UConverterAlias { return (alias.length() != 0); } - private static final String CNVALIAS_DATA_FILE_NAME = ICUResourceBundle.ICU_BUNDLE + "/cnvalias.icu"; + private static final String CNVALIAS_DATA_FILE_NAME = "cnvalias.icu"; private static final synchronized boolean haveAliasData() throws IOException{ boolean needInit; - // agljport:todo umtx_lock(NULL); needInit = gAliasData == null; /* load converter alias data from file if necessary */ @@ -129,10 +125,8 @@ final class UConverterAlias { ByteBuffer data = null; int[] tableArray = null; int tableStart; - //byte[] reservedBytes = null; - InputStream i = ICUData.getRequiredStream(CNVALIAS_DATA_FILE_NAME); - ByteBuffer b = ICUBinary.getByteBufferFromInputStream(i); + ByteBuffer b = ICUBinary.getRequiredData(CNVALIAS_DATA_FILE_NAME); UConverterAliasDataReader reader = new UConverterAliasDataReader(b); tableArray = reader.readToc(offsetsCount); @@ -160,21 +154,10 @@ final class UConverterAlias { if (gOptionTable[0] != STD_NORMALIZED) { throw new IOException("Unsupported alias normalization"); } - - // agljport:todo umtx_lock(NULL); + if (gAliasData == null) { gAliasData = data; data = null; - - // agljport:fix ucln_common_registerCleanup(UCLN_COMMON_IO, - // io_cleanup); - } - // agljport:todo umtx_unlock(NULL); - - /* if a different thread set it first, then close the extra data */ - if (data != null) { - // agljport:fix udata_close(data); /* NULL if it was set - // correctly */ } } diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java index 32d8e4e9537..ded61b68fdb 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java @@ -9,8 +9,14 @@ package com.ibm.icu.charset; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.IntBuffer; +import com.ibm.icu.charset.CharsetMBCS.MBCSHeader; +import com.ibm.icu.charset.CharsetMBCS.MBCSToUFallback; +import com.ibm.icu.charset.CharsetMBCS.UConverterMBCSTable; import com.ibm.icu.impl.ICUBinary; +import com.ibm.icu.impl.InvalidFormatException; /** * ucnvmbcs.h @@ -395,9 +401,17 @@ import com.ibm.icu.impl.ICUBinary; * Indexes and lengths stored in the fromUTableValues[]. */ -final class UConverterDataReader implements ICUBinary.Authenticate { +final class UConverterDataReader { //private final static boolean debug = ICUDebug.enabled("UConverterDataReader"); + private static final class IsAcceptable implements ICUBinary.Authenticate { + // @Override when we switch to Java 6 + public boolean isDataVersionAcceptable(byte formatVersion[]) { + return formatVersion[0] == 6; + } + } + private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); + /* * UConverterDataReader(UConverterDataReader r) { @@ -405,10 +419,8 @@ final class UConverterDataReader implements ICUBinary.Authenticate { unicodeVersion = r.unicodeVersion; } */ - /* the number bytes read from the buffer */ - int bytesRead = 0; - /* the number of bytes read for static data */ - int staticDataBytesRead = 0; + /** The buffer position after the static data. */ + private int posAfterStaticData; /** *

Protected constructor.

@@ -420,7 +432,7 @@ final class UConverterDataReader implements ICUBinary.Authenticate { //if(debug) System.out.println("Bytes in buffer " + bytes.remaining()); byteBuffer = bytes; - /*unicodeVersion = */ICUBinary.readHeader(byteBuffer, DATA_FORMAT_ID, this); + /*unicodeVersion = */ICUBinary.readHeader(byteBuffer, DATA_FORMAT_ID, IS_ACCEPTABLE); //if(debug) System.out.println("Bytes left in byteBuffer " + byteBuffer.remaining()); } @@ -429,95 +441,137 @@ final class UConverterDataReader implements ICUBinary.Authenticate { protected void readStaticData(UConverterStaticData sd) throws IOException { - int bRead = 0; sd.structSize = byteBuffer.getInt(); - bRead +=4; byte[] name = new byte[UConverterConstants.MAX_CONVERTER_NAME_LENGTH]; byteBuffer.get(name); - bRead +=name.length; - sd.name = new String(name, 0, name.length); + sd.name = new String(name, "US-ASCII"); sd.codepage = byteBuffer.getInt(); - bRead +=4; sd.platform = byteBuffer.get(); - bRead++; sd.conversionType = byteBuffer.get(); - bRead++; sd.minBytesPerChar = byteBuffer.get(); - bRead++; sd.maxBytesPerChar = byteBuffer.get(); - bRead++; byteBuffer.get(sd.subChar); - bRead += sd.subChar.length; sd.subCharLen = byteBuffer.get(); - bRead++; sd.hasToUnicodeFallback = byteBuffer.get(); - bRead++; sd.hasFromUnicodeFallback = byteBuffer.get(); - bRead++; sd.unicodeMask = (short)(byteBuffer.get() & 0xff); - bRead++; sd.subChar1 = byteBuffer.get(); - bRead++; byteBuffer.get(sd.reserved); - bRead += sd.reserved.length; - staticDataBytesRead = bRead; - bytesRead += bRead; + posAfterStaticData = byteBuffer.position(); + } + + int bytesReadAfterStaticData() { + return byteBuffer.position() - posAfterStaticData; } protected void readMBCSHeader(CharsetMBCS.MBCSHeader h) throws IOException { byteBuffer.get(h.version); - bytesRead += h.version.length; h.countStates = byteBuffer.getInt(); - bytesRead+=4; h.countToUFallbacks = byteBuffer.getInt(); - bytesRead+=4; h.offsetToUCodeUnits = byteBuffer.getInt(); - bytesRead+=4; h.offsetFromUTable = byteBuffer.getInt(); - bytesRead+=4; h.offsetFromUBytes = byteBuffer.getInt(); - bytesRead+=4; h.flags = byteBuffer.getInt(); - bytesRead+=4; h.fromUBytesLength = byteBuffer.getInt(); - bytesRead+=4; if (h.version[0] == 5 && h.version[1] >= 3) { h.options = byteBuffer.getInt(); - bytesRead+=4; if ((h.options & CharsetMBCS.MBCS_OPT_NO_FROM_U) != 0) { h.fullStage2Length = byteBuffer.getInt(); - bytesRead+=4; } } } - - protected void readMBCSTable(int[][] stateTableArray, CharsetMBCS.MBCSToUFallback[] toUFallbacksArray, char[] unicodeCodeUnitsArray, char[] fromUnicodeTableArray, byte[] fromUnicodeBytesArray) throws IOException + + protected void readMBCSTable(MBCSHeader header, UConverterMBCSTable mbcsTable) throws IOException { - int i, j; - for(i = 0; i < stateTableArray.length; ++i){ - for(j = 0; j < stateTableArray[i].length; ++j){ - stateTableArray[i][j] = byteBuffer.getInt(); - bytesRead+=4; + IntBuffer intBuffer = byteBuffer.asIntBuffer(); + mbcsTable.countStates = (byte) header.countStates; + mbcsTable.stateTable = new int[header.countStates][256]; + int i; + for(i = 0; i < header.countStates; ++i) { + intBuffer.get(mbcsTable.stateTable[i]); + } + + mbcsTable.countToUFallbacks = header.countToUFallbacks; + mbcsTable.toUFallbacks = new MBCSToUFallback[header.countToUFallbacks]; + for(i = 0; i < header.countToUFallbacks; ++i) { + int offset = intBuffer.get(); + int codePoint = intBuffer.get(); + mbcsTable.toUFallbacks[i] = new MBCSToUFallback(offset, codePoint); + } + // Skip as many bytes as we have read from the IntBuffer. + int length = intBuffer.position() * 4; + ICUBinary.skipBytes(byteBuffer, length); + + // Consider leaving some large arrays as CharBuffer/IntBuffer rather than + // reading them into Java arrays, to reduce initialization time and memory usage, + // at the cost of some performance. + // For example: unicodeCodeUnits, fromUnicodeTable, fromUnicodeInts. + // Take care not to modify the buffer contents for swaplfnl. + CharBuffer charBuffer = byteBuffer.asCharBuffer(); + length = header.offsetFromUTable - header.offsetToUCodeUnits; + assert (length & 1) == 0; + mbcsTable.unicodeCodeUnits = new char[length / 2]; + charBuffer.get(mbcsTable.unicodeCodeUnits); + // Skip as many bytes as we have read from the CharBuffer. + ICUBinary.skipBytes(byteBuffer, length); + + length = header.offsetFromUBytes - header.offsetFromUTable; + assert (length & 1) == 0; + int fromUTableCharsLength; + if (mbcsTable.outputType == CharsetMBCS.MBCS_OUTPUT_1) { + // single-byte table stage1 + stage2 + fromUTableCharsLength = length / 2; + } else if (mbcsTable.hasSupplementary()) { + // stage1 for Unicode limit 0x110000 >> 10 + fromUTableCharsLength = 0x440; + } else { + // stage1 for BMP limit 0x10000 >> 10 + fromUTableCharsLength = 0x40; + } + mbcsTable.fromUnicodeTable = new char[fromUTableCharsLength]; + charBuffer.get(mbcsTable.fromUnicodeTable); + if (mbcsTable.outputType != CharsetMBCS.MBCS_OUTPUT_1) { + // Read both stage1 and stage2 together into an int[] array. + // Keeping the short stage1 in the array avoids offsetting at runtime. + // The stage1 part of this array will not be used. + assert (length & 3) == 0; + mbcsTable.fromUnicodeTableInts = new int[length / 4]; + byteBuffer.asIntBuffer().get(mbcsTable.fromUnicodeTableInts); + } + // Skip as many bytes as are in stage1 + stage2. + ICUBinary.skipBytes(byteBuffer, length); + + mbcsTable.fromUBytesLength = header.fromUBytesLength; + boolean noFromU = ((header.options & CharsetMBCS.MBCS_OPT_NO_FROM_U) != 0); + if (!noFromU) { + switch (mbcsTable.outputType) { + case CharsetMBCS.MBCS_OUTPUT_1: + case CharsetMBCS.MBCS_OUTPUT_2: + case CharsetMBCS.MBCS_OUTPUT_2_SISO: + case CharsetMBCS.MBCS_OUTPUT_3_EUC: + mbcsTable.fromUnicodeChars = new char[header.fromUBytesLength / 2]; + byteBuffer.asCharBuffer().get(mbcsTable.fromUnicodeChars); + ICUBinary.skipBytes(byteBuffer, header.fromUBytesLength & ~1); + break; + case CharsetMBCS.MBCS_OUTPUT_3: + case CharsetMBCS.MBCS_OUTPUT_4_EUC: + mbcsTable.fromUnicodeBytes = new byte[header.fromUBytesLength]; + byteBuffer.get(mbcsTable.fromUnicodeBytes); + break; + case CharsetMBCS.MBCS_OUTPUT_4: + mbcsTable.fromUnicodeInts = new int[header.fromUBytesLength / 4]; + byteBuffer.asIntBuffer().get(mbcsTable.fromUnicodeInts); + ICUBinary.skipBytes(byteBuffer, header.fromUBytesLength & ~3); + break; + default: + // Cannot occur, caller checked already. + assert false; } - } - for(i = 0; i < toUFallbacksArray.length; ++i) { - toUFallbacksArray[i].offset = byteBuffer.getInt(); - bytesRead+=4; - toUFallbacksArray[i].codePoint = byteBuffer.getInt(); - bytesRead+=4; - } - for(i = 0; i < unicodeCodeUnitsArray.length; ++i){ - unicodeCodeUnitsArray[i] = byteBuffer.getChar(); - bytesRead+=2; - } - for(i = 0; i < fromUnicodeTableArray.length; ++i){ - fromUnicodeTableArray[i] = byteBuffer.getChar(); - bytesRead+=2; - } - for(i = 0; i < fromUnicodeBytesArray.length; ++i){ - fromUnicodeBytesArray[i] = byteBuffer.get(); - bytesRead++; + } else { + // Optional utf8Friendly mbcsIndex -- _MBCSHeader.version 4.3 (ICU 3.8) and higher. + // Needed for reconstituting omitted data. + mbcsTable.mbcsIndex = byteBuffer.asCharBuffer(); } } @@ -527,60 +581,33 @@ final class UConverterDataReader implements ICUBinary.Authenticate { StringBuilder name = new StringBuilder(); while((c = (char)byteBuffer.get()) != 0){ name.append(c); - bytesRead++; } - bytesRead++/*for null terminator*/; return name.toString(); } //protected int[] readExtIndexes(int skip) throws IOException - protected ByteBuffer readExtIndexes(int skip) throws IOException + protected ByteBuffer readExtIndexes(int skip) throws IOException, InvalidFormatException { ICUBinary.skipBytes(byteBuffer, skip); - int n = byteBuffer.getInt(); - bytesRead+=4; - int[] indexes = new int[n]; - indexes[0] = n; - for(int i = 1; i < n; ++i) { - indexes[i] = byteBuffer.getInt(); - bytesRead+=4; + ByteBuffer b = ICUBinary.sliceWithOrder(byteBuffer); + int lengthOfIndexes = b.getInt(0); + if (lengthOfIndexes < 32) { + throw new InvalidFormatException(); } - //return indexes; - - ByteBuffer b = ByteBuffer.allocate(indexes[31]); - for(int i = 0; i < n; ++i) { - b.putInt(indexes[i]); - } - int len = b.remaining(); - byteBuffer.get(b.array(), b.position(), len); - bytesRead += len; + int numBytesExtensionStructure = b.getInt(31 * 4); + b.limit(numBytesExtensionStructure); + ICUBinary.skipBytes(byteBuffer, numBytesExtensionStructure); return b; } - /*protected byte[] readExtTables(int n) throws IOException - { - byte[] tables = new byte[n]; - int len = byteBuffer.get(tables); - if(len==-1){ - throw new IOException("Read failed"); - } - bytesRead += len; - return tables; - }*/ - - byte[] getDataFormatVersion(){ - return DATA_FORMAT_VERSION; - } /** - * Inherited method + * Data formatVersion 6.1 and higher has a unicodeMask. */ - public boolean isDataVersionAcceptable(byte version[]){ - return version[0] == DATA_FORMAT_VERSION[0]; + boolean dataFormatHasUnicodeMask() { + int formatVersion0 = byteBuffer.get(16) & 0xff; + return formatVersion0 > 6 || (formatVersion0 == 6 && byteBuffer.get(17) != 0); } - -/* byte[] getUnicodeVersion(){ - return unicodeVersion; - }*/ + // private data members ------------------------------------------------- /** @@ -597,5 +624,4 @@ final class UConverterDataReader implements ICUBinary.Authenticate { */ // DATA_FORMAT_ID_ values taken from icu4c isCnvAcceptable (ucnv_bld.c) private static final int DATA_FORMAT_ID = 0x636e7674; // dataFormat="cnvt" - private static final byte DATA_FORMAT_VERSION[] = {(byte)0x6}; } diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java index 13c0071f736..185fe55bc2d 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java @@ -12,12 +12,11 @@ package com.ibm.icu.impl.coll; import java.io.IOException; -import java.io.InputStream; +import java.nio.ByteBuffer; import java.util.MissingResourceException; import com.ibm.icu.impl.ICUBinary; import com.ibm.icu.impl.ICUData; -import com.ibm.icu.impl.ICUResourceBundle; /** * Collation root provider. @@ -42,20 +41,20 @@ public final class CollationRoot { // purely static } static { // Corresponds to C++ load() function. - CollationTailoring t = new CollationTailoring(null); - // TODO: Optionally load from a .dat file or stand-alone .icu file. - String path = ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu"; - InputStream is = ICUData.getRequiredStream(path); + CollationTailoring t = null; RuntimeException e2 = null; try { - CollationDataReader.read(null, ICUBinary.getByteBufferFromInputStream(is), t); + ByteBuffer bytes = ICUBinary.getRequiredData("coll/ucadata.icu"); + CollationTailoring t2 = new CollationTailoring(null); + CollationDataReader.read(null, bytes, t2); + // Keep t=null until after the root data has been read completely. + // Otherwise we would set a non-null root object if the data reader throws an exception. + t = t2; } catch(IOException e) { - t = null; e2 = new MissingResourceException( "IOException while reading CLDR root data", - "CollationRoot", path); + "CollationRoot", ICUData.ICU_BUNDLE + "/coll/ucadata.icu"); } catch(RuntimeException e) { - t = null; e2 = e; } rootSingleton = t; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/ICUConfig.properties b/icu4j/main/classes/core/src/com/ibm/icu/ICUConfig.properties index 171ebed73ea..9c487993cb1 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/ICUConfig.properties +++ b/icu4j/main/classes/core/src/com/ibm/icu/ICUConfig.properties @@ -3,7 +3,7 @@ #* Copyright (C) 2008-2014, International Business Machines Corporation and * #* others. All Rights Reserved. * #******************************************************************************* -#* This is the properties contains ICU runtime configuration +#* This is the properties file which contains ICU runtime configuration. #* # @@ -20,6 +20,7 @@ com.ibm.icu.util.TimeZone.DefaultTimeZoneType = ICU com.ibm.icu.text.MessagePattern.ApostropheMode = DOUBLE_OPTIONAL # +# [Internal Use Only] # By default, DecimalFormat uses some internal equivalent character # data in addition to ones in DecimalFormatSymbols for parsing # decimal/grouping separators. When this property is true, @@ -29,8 +30,18 @@ com.ibm.icu.text.MessagePattern.ApostropheMode = DOUBLE_OPTIONAL # @internal com.ibm.icu.text.DecimalFormat.SkipExtendedSeparatorParsing = false +# File system path where ICU looks for binary data files. +# If not empty, then ICU looks for binary data files before looking for data on the classpath. +# This string may contain multiple paths, see File.pathSeparatorChar. +# Spaces (U+0020) around each path are trimmed away. Empty paths are ignored. +# There may be individual files, for example, zoneinfo64.res, +# or ICU4C .dat package files, for example, collation.dat or icudt54l.dat. +# Each ICU data file may contain little-endian or big-endian data. +# Each ICU data file's charset must be ASCII. (Platform type 'l' or 'b' but not 'e'.) +# @draft ICU 54 +com.ibm.icu.impl.ICUBinary.dataPath = -# +# # [Internal Use Only] # Disable resource path scan for building full locale name list # at run time. diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java index b345d74f038..faec76a3d48 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java @@ -7,17 +7,262 @@ package com.ibm.icu.impl; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.util.ArrayList; +import java.util.List; +import java.util.MissingResourceException; +import com.ibm.icu.util.ICUUncheckedIOException; import com.ibm.icu.util.VersionInfo; -public final class ICUBinary -{ +public final class ICUBinary { + /** + * Reads the ICU .dat package file format. + * Most methods do not modify the ByteBuffer in any way, + * not even its position or other state. + */ + private static final class DatPackageReader { + /** + * .dat package data format ID "CmnD". + */ + private static final int DATA_FORMAT = 0x436d6e44; + + private static final class IsAcceptable implements Authenticate { + // @Override when we switch to Java 6 + public boolean isDataVersionAcceptable(byte version[]) { + return version[0] == 1; + } + } + private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); + + /** + * Checks that the ByteBuffer contains a valid, usable ICU .dat package. + * Moves the buffer position from 0 to after the data header. + */ + private static boolean validate(ByteBuffer bytes) { + try { + readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE); + } catch (IOException ignored) { + return false; + } + int count = bytes.getInt(bytes.position()); // Do not move the position. + if (count <= 0) { + return false; + } + // For each item, there is one ToC entry (8 bytes) and a name string + // and a data item of at least 16 bytes. + // (We assume no data item duplicate elimination for now.) + if (bytes.position() + 4 + count * (8 + 16) > bytes.capacity()) { + return false; + } + if (!startsWithPackageName(bytes, getNameOffset(bytes, 0)) || + !startsWithPackageName(bytes, getNameOffset(bytes, count - 1))) { + return false; + } + return true; + } + + private static boolean startsWithPackageName(ByteBuffer bytes, int start) { + // Compare all but the trailing 'b' or 'l' which depends on the platform. + int length = ICUData.PACKAGE_NAME.length() - 1; + for (int i = 0; i < length; ++i) { + if (bytes.get(start + i) != ICUData.PACKAGE_NAME.charAt(i)) { + return false; + } + } + // Check for 'b' or 'l' followed by '/'. + byte c = bytes.get(start + length++); + if ((c != 'b' && c != 'l') || bytes.get(start + length) != '/') { + return false; + } + return true; + } + + private static ByteBuffer getData(ByteBuffer bytes, CharSequence key) { + int base = bytes.position(); + int count = bytes.getInt(base); + + // Do a binary search for the key. + int start = 0; + int limit = count; + while (start < limit) { + int mid = (start + limit) >>> 1; + int nameOffset = getNameOffset(bytes, mid); + // Skip "icudt54b/". + nameOffset += ICUData.PACKAGE_NAME.length() + 1; + int result = compareKeys(key, bytes, nameOffset); + if (result < 0) { + limit = mid; + } else if (result > 0) { + start = mid + 1; + } else { + // We found it! + ByteBuffer data = bytes.duplicate(); + data.position(getDataOffset(bytes, mid)); + data.limit(getDataOffset(bytes, mid + 1)); + return ICUBinary.sliceWithOrder(data); + } + } + return null; // Not found or table is empty. + } + + private static int getNameOffset(ByteBuffer bytes, int index) { + int base = bytes.position(); + assert 0 <= index && index < bytes.getInt(base); // count + // The count integer (4 bytes) + // is followed by count (nameOffset, dataOffset) integer pairs (8 bytes per pair). + return base + bytes.getInt(base + 4 + index * 8); + } + + private static int getDataOffset(ByteBuffer bytes, int index) { + int base = bytes.position(); + int count = bytes.getInt(base); + if (index == count) { + // Return the limit of the last data item. + return bytes.capacity(); + } + assert 0 <= index && index < count; + // The count integer (4 bytes) + // is followed by count (nameOffset, dataOffset) integer pairs (8 bytes per pair). + // The dataOffset follows the nameOffset (skip another 4 bytes). + return base + bytes.getInt(base + 4 + 4 + index * 8); + } + } + + private static final class DataFile { + public final String itemPath; + /** + * null if a .dat package. + */ + public final File path; + /** + * .dat package bytes, or null if not a .dat package. + * position() is after the header. + * Do not modify the position or other state, for thread safety. + */ + public final ByteBuffer pkgBytes; + + public DataFile(String item, File path) { + itemPath = item; + this.path = path; + pkgBytes = null; + } + public DataFile(String item, ByteBuffer bytes) { + itemPath = item; + path = null; + pkgBytes = bytes; + } + public String toString() { + return path.toString(); + } + } + private static final List icuDataFiles = new ArrayList(); + + static { + // Normally com.ibm.icu.impl.ICUBinary.dataPath. + String dataPath = ICUConfig.get(ICUBinary.class.getName() + ".dataPath"); + if (dataPath != null) { + addDataFilesFromPath(dataPath, icuDataFiles); + } + } + + private static void addDataFilesFromPath(String dataPath, List files) { + // Split the path and find files in each location. + // This splitting code avoids the regex pattern compilation in String.split() + // and its array allocation. + // (There is no simple by-character split() + // and the StringTokenizer "is discouraged in new code".) + int pathStart = 0; + while (pathStart < dataPath.length()) { + int sepIndex = dataPath.indexOf(File.pathSeparatorChar, pathStart); + int pathLimit; + if (sepIndex >= 0) { + pathLimit = sepIndex; + } else { + pathLimit = dataPath.length(); + } + String path = dataPath.substring(pathStart, pathLimit).trim(); + if (path.endsWith(File.separator)) { + path = path.substring(0, path.length() - 1); + } + if (path.length() != 0) { + addDataFilesFromFolder(new File(path), new StringBuilder(), icuDataFiles); + } + if (sepIndex < 0) { + break; + } + pathStart = sepIndex + 1; + } + } + + private static void addDataFilesFromFolder(File folder, StringBuilder itemPath, + List dataFiles) { + File[] files = folder.listFiles(); + if (files == null || files.length == 0) { + return; + } + int folderPathLength = itemPath.length(); + if (folderPathLength > 0) { + // The item path must use the ICU file separator character, + // not the platform-dependent File.separatorChar, + // so that the enumerated item paths match the paths requested by ICU code. + itemPath.append('/'); + ++folderPathLength; + } + for (File file : files) { + String fileName = file.getName(); + if (fileName.endsWith(".txt")) { + continue; + } + itemPath.append(fileName); + if (file.isDirectory()) { + // TODO: Within a folder, put all single files before all .dat packages? + addDataFilesFromFolder(file, itemPath, dataFiles); + } else if (fileName.endsWith(".dat")) { + ByteBuffer pkgBytes = mapFile(file); + if (pkgBytes != null && DatPackageReader.validate(pkgBytes)) { + dataFiles.add(new DataFile(itemPath.toString(), pkgBytes)); + } + } else { + dataFiles.add(new DataFile(itemPath.toString(), file)); + } + itemPath.setLength(folderPathLength); + } + } + + /** + * Compares the length-specified input key with the + * NUL-terminated table key. (ASCII) + */ + static int compareKeys(CharSequence key, ByteBuffer bytes, int offset) { + for (int i = 0;; ++i, ++offset) { + int c2 = bytes.get(offset); + if (c2 == 0) { + if (i == key.length()) { + return 0; + } else { + return 1; // key > table key because key is longer. + } + } else if (i == key.length()) { + return -1; // key < table key because key is shorter. + } + int diff = (int)key.charAt(i) - c2; + if (diff != 0) { + return diff; + } + } + } + // public inner interface ------------------------------------------------ - + /** * Special interface for data authentication */ @@ -34,10 +279,131 @@ public final class ICUBinary // public methods -------------------------------------------------------- + /** + * Loads an ICU binary data file and returns it as a ByteBuffer. + * The buffer contents is normally read-only, but its position etc. can be modified. + * + * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu". + * @return The data as a read-only ByteBuffer, + * or null if the resource could not be found. + */ + public static ByteBuffer getData(String itemPath) { + return getData(null, null, itemPath, false); + } + + /** + * Loads an ICU binary data file and returns it as a ByteBuffer. + * The buffer contents is normally read-only, but its position etc. can be modified. + * + * @param loader Used for loader.getResourceAsStream() unless the data is found elsewhere. + * @param resourceName Resource name for use with the loader. + * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu". + * @return The data as a read-only ByteBuffer, + * or null if the resource could not be found. + */ + public static ByteBuffer getData(ClassLoader loader, String resourceName, String itemPath) { + return getData(loader, resourceName, itemPath, false); + } + + /** + * Loads an ICU binary data file and returns it as a ByteBuffer. + * The buffer contents is normally read-only, but its position etc. can be modified. + * + * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu". + * @return The data as a read-only ByteBuffer. + * @throws MissingResourceException if required==true and the resource could not be found + */ + public static ByteBuffer getRequiredData(String itemPath) { + return getData(null, null, itemPath, true); + } + + /** + * Loads an ICU binary data file and returns it as a ByteBuffer. + * The buffer contents is normally read-only, but its position etc. can be modified. + * + * @param loader Used for loader.getResourceAsStream() unless the data is found elsewhere. + * @param resourceName Resource name for use with the loader. + * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu". + * @return The data as a read-only ByteBuffer. + * @throws MissingResourceException if required==true and the resource could not be found + */ +// public static ByteBuffer getRequiredData(ClassLoader loader, String resourceName, +// String itemPath) { +// return getData(loader, resourceName, itemPath, true); +// } + + /** + * Loads an ICU binary data file and returns it as a ByteBuffer. + * The buffer contents is normally read-only, but its position etc. can be modified. + * + * @param loader Used for loader.getResourceAsStream() unless the data is found elsewhere. + * @param resourceName Resource name for use with the loader. + * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu". + * @param required If the resource cannot be found, + * this method returns null (!required) or throws an exception (required). + * @return The data as a read-only ByteBuffer, + * or null if required==false and the resource could not be found. + * @throws MissingResourceException if required==true and the resource could not be found + */ + private static ByteBuffer getData(ClassLoader loader, String resourceName, + String itemPath, boolean required) { + ByteBuffer bytes = getDataFromFile(itemPath); + if (bytes != null) { + return bytes; + } + if (loader == null) { + loader = ICUData.class.getClassLoader(); + } + if (resourceName == null) { + resourceName = ICUData.ICU_BASE_NAME + '/' + itemPath; + } + InputStream is = ICUData.getStream(loader, resourceName, required); + if (is == null) { + return null; + } + try { + return getByteBufferFromInputStream(is); + } catch (IOException e) { + throw new ICUUncheckedIOException(e); + } + } + + private static ByteBuffer getDataFromFile(String itemPath) { + for (DataFile dataFile : icuDataFiles) { + if (dataFile.pkgBytes != null) { + ByteBuffer data = DatPackageReader.getData(dataFile.pkgBytes, itemPath); + if (data != null) { + return data; + } + } else if (itemPath.equals(dataFile.itemPath)) { + return mapFile(dataFile.path); + } + } + return null; + } + + private static ByteBuffer mapFile(File path) { + FileInputStream file; + try { + file = new FileInputStream(path); + FileChannel channel = file.getChannel(); + ByteBuffer bytes = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size()); + // Close the file and its channel; this seems to keep the ByteBuffer valid. + // If not, then we will need to return the pair of (file, bytes). + file.close(); + return bytes; + } catch(FileNotFoundException ignored) { + System.err.println(ignored); + } catch (IOException ignored) { + System.err.println(ignored); + } + return null; + } + /** * Same as readHeader(), but returns a VersionInfo rather than a compact int. */ - public static final VersionInfo readHeaderAndDataVersion(ByteBuffer bytes, + public static VersionInfo readHeaderAndDataVersion(ByteBuffer bytes, int dataFormat, Authenticate authenticate) throws IOException { @@ -56,7 +422,7 @@ public final class ICUBinary * @return dataVersion * @throws IOException if this is not a valid ICU data item of the expected dataFormat */ - public static final int readHeader(ByteBuffer bytes, int dataFormat, Authenticate authenticate) + public static int readHeader(ByteBuffer bytes, int dataFormat, Authenticate authenticate) throws IOException { assert bytes.position() == 0; byte magic1 = bytes.get(2); @@ -89,7 +455,11 @@ public final class ICUBinary bytes.get(14) != (byte)(dataFormat >> 8) || bytes.get(15) != (byte)dataFormat || (authenticate != null && !authenticate.isDataVersionAcceptable(formatVersion))) { - throw new IOException(HEADER_AUTHENTICATION_FAILED_); + throw new IOException(HEADER_AUTHENTICATION_FAILED_ + + String.format("; data format %02x%02x%02x%02x, format version %d.%d.%d.%d", + bytes.get(12), bytes.get(13), bytes.get(14), bytes.get(15), + formatVersion[0] & 0xff, formatVersion[1] & 0xff, + formatVersion[2] & 0xff, formatVersion[3] & 0xff)); } bytes.position(headerSize); @@ -100,17 +470,54 @@ public final class ICUBinary (bytes.get(23) & 0xff); } - public static final void skipBytes(ByteBuffer bytes, int skipLength) { + /** + * Writes an ICU data header. + * Does not write a copyright string. + * + * @return The length of the header (number of bytes written). + * @throws IOException from the DataOutputStream + */ + public static int writeHeader(int dataFormat, int formatVersion, int dataVersion, + DataOutputStream dos) throws IOException { + // ucmndata.h MappedData + dos.writeChar(32); // headerSize + dos.writeByte(MAGIC1); + dos.writeByte(MAGIC2); + // unicode/udata.h UDataInfo + dos.writeChar(20); // sizeof(UDataInfo) + dos.writeChar(0); // reservedWord + dos.writeByte(1); // isBigEndian + dos.writeByte(CHAR_SET_); // charsetFamily + dos.writeByte(CHAR_SIZE_); // sizeofUChar + dos.writeByte(0); // reservedByte + dos.writeInt(dataFormat); + dos.writeInt(formatVersion); + dos.writeInt(dataVersion); + // 8 bytes padding for 32 bytes headerSize (multiple of 16). + dos.writeLong(0); + assert dos.size() == 32; + return 32; + } + + public static void skipBytes(ByteBuffer bytes, int skipLength) { if (skipLength > 0) { bytes.position(bytes.position() + skipLength); } } + /** + * Same as ByteBuffer.slice() plus preserving the byte order. + */ + public static ByteBuffer sliceWithOrder(ByteBuffer bytes) { + ByteBuffer b = bytes.slice(); + return b.order(bytes.order()); + } + /** * Reads the entire contents from the stream into a byte array * and wraps it into a ByteBuffer. Closes the InputStream at the end. */ - public static final ByteBuffer getByteBufferFromInputStream(InputStream is) throws IOException { + public static ByteBuffer getByteBufferFromInputStream(InputStream is) throws IOException { try { int avail = is.available(); byte[] bytes = new byte[avail]; @@ -128,7 +535,7 @@ public final class ICUBinary } } - private static final void readFully(InputStream is, byte[] bytes, int offset, int avail) + private static void readFully(InputStream is, byte[] bytes, int offset, int avail) throws IOException { while (avail > 0) { int numRead = is.read(bytes, offset, avail); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUData.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUData.java index b47b278db85..b5fa3e14acf 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUData.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUData.java @@ -1,7 +1,7 @@ /* ******************************************************************************* - * Copyright (C) 2004-2009, International Business Machines Corporation and * - * others. All Rights Reserved. * + * Copyright (C) 2004-2014, International Business Machines Corporation and + * others. All Rights Reserved. ******************************************************************************* * * Created on Feb 4, 2004 @@ -9,22 +9,83 @@ */ package com.ibm.icu.impl; +import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.security.AccessController; import java.security.PrivilegedAction; import java.util.MissingResourceException; +import java.util.logging.Logger; + +import com.ibm.icu.util.VersionInfo; /** * Provides access to ICU data files as InputStreams. Implements security checking. */ public final class ICUData { - /* - * Return a URL to the ICU resource names resourceName. The - * resource name should either be an absolute path, or a path relative to - * com.ibm.icu.impl (e.g., most likely it is 'data/foo'). If required - * is true, throw an MissingResourceException instead of returning a null result. + /** + * The data path to be used with getBundleInstance API */ + static final String ICU_DATA_PATH = "com/ibm/icu/impl/"; + /** + * The ICU data package name. + * This is normally the name of the .dat package, and the prefix (plus '/') + * of the package entry names. + */ + static final String PACKAGE_NAME = "icudt" + VersionInfo.ICU_DATA_VERSION_PATH; + /** + * The data path to be used with Class.getResourceAsStream(). + */ + public static final String ICU_BUNDLE = "data/" + PACKAGE_NAME; + + /** + * The base name of ICU data to be used with ClassLoader.getResourceAsStream(), + * ICUResourceBundle.getBundleInstance() etc. + */ + public static final String ICU_BASE_NAME = ICU_DATA_PATH + ICU_BUNDLE; + + /** + * The base name of collation data to be used with getBundleInstance API + */ + public static final String ICU_COLLATION_BASE_NAME = ICU_BASE_NAME + "/coll"; + + /** + * The base name of rbbi data to be used with getData API + */ + public static final String ICU_BRKITR_NAME = "brkitr"; + + /** + * The base name of rbbi data to be used with getBundleInstance API + */ + public static final String ICU_BRKITR_BASE_NAME = ICU_BASE_NAME + '/' + ICU_BRKITR_NAME; + + /** + * The base name of rbnf data to be used with getBundleInstance API + */ + public static final String ICU_RBNF_BASE_NAME = ICU_BASE_NAME + "/rbnf"; + + /** + * The base name of transliterator data to be used with getBundleInstance API + */ + public static final String ICU_TRANSLIT_BASE_NAME = ICU_BASE_NAME + "/translit"; + + public static final String ICU_LANG_BASE_NAME = ICU_BASE_NAME + "/lang"; + public static final String ICU_CURR_BASE_NAME = ICU_BASE_NAME + "/curr"; + public static final String ICU_REGION_BASE_NAME = ICU_BASE_NAME + "/region"; + public static final String ICU_ZONE_BASE_NAME = ICU_BASE_NAME + "/zone"; + + /** + * For testing (otherwise false): When reading an InputStream from a Class or ClassLoader + * (that is, not from a file), log when the stream contains ICU binary data. + * + * This cannot be ICUConfig'ured because ICUConfig calls ICUData.getStream() + * to read the properties file, so we would get a circular dependency + * in the class initialization. + */ + private static final boolean logBinaryDataFromInputStream = false; + private static final Logger logger = logBinaryDataFromInputStream ? + Logger.getLogger(ICUData.class.getName()) : null; + public static boolean exists(final String resourceName) { URL i = null; if (System.getSecurityManager() != null) { @@ -38,10 +99,9 @@ public final class ICUData { } return i != null; } - + private static InputStream getStream(final Class root, final String resourceName, boolean required) { InputStream i = null; - if (System.getSecurityManager() != null) { i = AccessController.doPrivileged(new PrivilegedAction() { public InputStream run() { @@ -55,10 +115,14 @@ public final class ICUData { if (i == null && required) { throw new MissingResourceException("could not locate data " +resourceName, root.getPackage().getName(), resourceName); } + checkStreamForBinaryData(i, resourceName); return i; } - private static InputStream getStream(final ClassLoader loader, final String resourceName, boolean required) { + /** + * Should be called only from ICUBinary.getData() or from convenience overloads here. + */ + static InputStream getStream(final ClassLoader loader, final String resourceName, boolean required) { InputStream i = null; if (System.getSecurityManager() != null) { i = AccessController.doPrivileged(new PrivilegedAction() { @@ -72,40 +136,67 @@ public final class ICUData { if (i == null && required) { throw new MissingResourceException("could not locate data", loader.toString(), resourceName); } + checkStreamForBinaryData(i, resourceName); return i; } - + + @SuppressWarnings("unused") // used if logBinaryDataFromInputStream == true + private static void checkStreamForBinaryData(InputStream is, String resourceName) { + if (logBinaryDataFromInputStream && is != null && resourceName.indexOf(PACKAGE_NAME) >= 0) { + try { + is.mark(32); + byte[] b = new byte[32]; + int len = is.read(b); + if (len == 32 && b[2] == (byte)0xda && b[3] == 0x27) { + String msg = String.format( + "ICU binary data file loaded from Class/ClassLoader as InputStream " + + "from %s: MappedData %02x%02x%02x%02x dataFormat %02x%02x%02x%02x", + resourceName, + b[0], b[1], b[2], b[3], + b[12], b[13], b[14], b[15]); + logger.info(msg); + } + is.reset(); + } catch (IOException ignored) { + } + } + } + public static InputStream getStream(ClassLoader loader, String resourceName){ - return getStream(loader,resourceName, false); + return getStream(loader,resourceName, false); } public static InputStream getRequiredStream(ClassLoader loader, String resourceName){ return getStream(loader, resourceName, true); } - /* + /** * Convenience override that calls getStream(ICUData.class, resourceName, false); + * Returns null if the resource could not be found. */ public static InputStream getStream(String resourceName) { return getStream(ICUData.class, resourceName, false); } - - /* + + /** * Convenience method that calls getStream(ICUData.class, resourceName, true). + * @throws MissingResourceException if the resource could not be found */ public static InputStream getRequiredStream(String resourceName) { return getStream(ICUData.class, resourceName, true); } - /* + /** * Convenience override that calls getStream(root, resourceName, false); + * Returns null if the resource could not be found. */ public static InputStream getStream(Class root, String resourceName) { return getStream(root, resourceName, false); } - - /* + + /** * Convenience method that calls getStream(root, resourceName, true). + * @throws MissingResourceException if the resource could not be found */ public static InputStream getRequiredStream(Class root, String resourceName) { return getStream(root, resourceName, true); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundle.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundle.java index 63394d76070..3f6fb14970a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundle.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundle.java @@ -29,52 +29,76 @@ import com.ibm.icu.util.ULocale; import com.ibm.icu.util.UResourceBundle; import com.ibm.icu.util.UResourceBundleIterator; import com.ibm.icu.util.UResourceTypeMismatchException; -import com.ibm.icu.util.VersionInfo; public class ICUResourceBundle extends UResourceBundle { /** * The data path to be used with getBundleInstance API + * @deprecated because not specific to resource bundles; use the ICUData constants instead */ - protected static final String ICU_DATA_PATH = "com/ibm/icu/impl/"; + @Deprecated + protected static final String ICU_DATA_PATH = ICUData.ICU_DATA_PATH; /** * The data path to be used with getBundleInstance API + * @deprecated because not specific to resource bundles; use the ICUData constants instead */ - public static final String ICU_BUNDLE = "data/icudt" + VersionInfo.ICU_DATA_VERSION_PATH; + @Deprecated + public static final String ICU_BUNDLE = ICUData.ICU_BUNDLE; /** * The base name of ICU data to be used with getBundleInstance API + * @deprecated because not specific to resource bundles; use the ICUData constants instead */ - public static final String ICU_BASE_NAME = ICU_DATA_PATH + ICU_BUNDLE; + @Deprecated + public static final String ICU_BASE_NAME = ICUData.ICU_BASE_NAME; /** * The base name of collation data to be used with getBundleInstance API + * @deprecated because not specific to resource bundles; use the ICUData constants instead */ - public static final String ICU_COLLATION_BASE_NAME = ICU_BASE_NAME + "/coll"; - - /** - * The base name of rbbi data to be used with getData API - */ - public static final String ICU_BRKITR_NAME = "/brkitr"; + @Deprecated + public static final String ICU_COLLATION_BASE_NAME = ICUData.ICU_COLLATION_BASE_NAME; /** * The base name of rbbi data to be used with getBundleInstance API + * @deprecated because not specific to resource bundles; use the ICUData constants instead */ - public static final String ICU_BRKITR_BASE_NAME = ICU_BASE_NAME + ICU_BRKITR_NAME; + @Deprecated + public static final String ICU_BRKITR_BASE_NAME = ICUData.ICU_BRKITR_BASE_NAME; /** * The base name of rbnf data to be used with getBundleInstance API + * @deprecated because not specific to resource bundles; use the ICUData constants instead */ - public static final String ICU_RBNF_BASE_NAME = ICU_BASE_NAME + "/rbnf"; + @Deprecated + public static final String ICU_RBNF_BASE_NAME = ICUData.ICU_RBNF_BASE_NAME; /** * The base name of transliterator data to be used with getBundleInstance API + * @deprecated because not specific to resource bundles; use the ICUData constants instead */ - public static final String ICU_TRANSLIT_BASE_NAME = ICU_BASE_NAME + "/translit"; + @Deprecated + public static final String ICU_TRANSLIT_BASE_NAME = ICUData.ICU_TRANSLIT_BASE_NAME; - public static final String ICU_LANG_BASE_NAME = ICU_BASE_NAME + "/lang"; - public static final String ICU_CURR_BASE_NAME = ICU_BASE_NAME + "/curr"; - public static final String ICU_REGION_BASE_NAME = ICU_BASE_NAME + "/region"; - public static final String ICU_ZONE_BASE_NAME = ICU_BASE_NAME + "/zone"; + /** + * @deprecated because not specific to resource bundles; use the ICUData constants instead + */ + @Deprecated + public static final String ICU_LANG_BASE_NAME = ICUData.ICU_LANG_BASE_NAME; + /** + * @deprecated because not specific to resource bundles; use the ICUData constants instead + */ + @Deprecated + public static final String ICU_CURR_BASE_NAME = ICUData.ICU_CURR_BASE_NAME; + /** + * @deprecated because not specific to resource bundles; use the ICUData constants instead + */ + @Deprecated + public static final String ICU_REGION_BASE_NAME = ICUData.ICU_REGION_BASE_NAME; + /** + * @deprecated because not specific to resource bundles; use the ICUData constants instead + */ + @Deprecated + public static final String ICU_ZONE_BASE_NAME = ICUData.ICU_ZONE_BASE_NAME; private static final String NO_INHERITANCE_MARKER = "\u2205\u2205\u2205"; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java index 6c921db8862..1f3a075a283 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java @@ -328,11 +328,25 @@ public final class ICUResourceBundleReader { @Override protected ICUResourceBundleReader createInstance(ReaderInfo key, ReaderInfo data) { String fullName = ICUResourceBundleReader.getFullName(data.baseName, data.localeID); - InputStream stream = ICUData.getStream(data.loader, fullName); - if (stream == null) { - return NULL_READER; + try { + ByteBuffer inBytes; + if (data.baseName != null && data.baseName.startsWith(ICUData.ICU_BASE_NAME)) { + String itemPath = fullName.substring(ICUData.ICU_BASE_NAME.length() + 1); + inBytes = ICUBinary.getData(data.loader, fullName, itemPath); + if (inBytes == null) { + return NULL_READER; + } + } else { + InputStream stream = ICUData.getStream(data.loader, fullName); + if (stream == null) { + return NULL_READER; + } + inBytes = ICUBinary.getByteBufferFromInputStream(stream); + } + return new ICUResourceBundleReader(inBytes, data.baseName, data.localeID, data.loader); + } catch (IOException ex) { + throw new ICUUncheckedIOException("Data file " + fullName + " is corrupt - " + ex.getMessage(), ex); } - return new ICUResourceBundleReader(stream, data.baseName, data.localeID, data.loader); } } @@ -342,14 +356,10 @@ public final class ICUResourceBundleReader { private ICUResourceBundleReader() { } - private ICUResourceBundleReader(InputStream stream, String baseName, String localeID, ClassLoader loader) { - try { - ByteBuffer inBytes = ICUBinary.getByteBufferFromInputStream(stream); - init(inBytes); - } catch (IOException ex) { - String fullName = ICUResourceBundleReader.getFullName(baseName, localeID); - throw new ICUUncheckedIOException("Data file " + fullName + " is corrupt - " + ex.getMessage(), ex); - } + private ICUResourceBundleReader(ByteBuffer inBytes, + String baseName, String localeID, + ClassLoader loader) throws IOException { + init(inBytes); // set pool bundle keys if necessary if (usesPoolBundle) { @@ -377,7 +387,7 @@ public final class ICUResourceBundleReader { private void init(ByteBuffer inBytes) throws IOException { dataVersion = ICUBinary.readHeader(inBytes, DATA_FORMAT, IS_ACCEPTABLE); boolean isFormatVersion10 = inBytes.get(16) == 1 && inBytes.get(17) == 0; - bytes = inBytes.slice(); + bytes = ICUBinary.sliceWithOrder(inBytes); int dataLength = bytes.remaining(); if(DEBUG) System.out.println("The ByteBuffer is direct (memory-mapped): " + bytes.isDirect()); @@ -420,7 +430,7 @@ public final class ICUResourceBundleReader { if(_16BitTop > keysTop) { int num16BitUnits = (_16BitTop - keysTop) * 2; bytes.position(keysTop << 2); - b16BitUnits = bytes.slice().asCharBuffer(); + b16BitUnits = bytes.asCharBuffer(); b16BitUnits.limit(num16BitUnits); maxOffset |= num16BitUnits - 1; } else { @@ -444,7 +454,7 @@ public final class ICUResourceBundleReader { // unlike regular bundles' key strings for which indexes // are based on the start of the bundle data. bytes.position((1 + indexLength) << 2); - bytes = bytes.slice(); + bytes = ICUBinary.sliceWithOrder(bytes); } else { localKeyLimit = getIndexesInt(URES_INDEX_KEYS_TOP) << 2; } @@ -582,38 +592,18 @@ public final class ICUResourceBundleReader { return makeKeyStringFromBytes(poolBundleKeys, keyOffset & 0x7fffffff); } } - // Compare the length-specified input key with the - // NUL-terminated table key. - private static int compareKeys(CharSequence key, ByteBuffer keyBytes, int keyOffset) { - for(int i = 0;; ++i, ++keyOffset) { - int c2 = keyBytes.get(keyOffset); - if(c2 == 0) { - if(i == key.length()) { - return 0; - } else { - return 1; // key > table key because key is longer. - } - } else if(i == key.length()) { - return -1; // key < table key because key is shorter. - } - int diff = (int)key.charAt(i) - c2; - if(diff != 0) { - return diff; - } - } - } private int compareKeys(CharSequence key, char keyOffset) { if(keyOffset < localKeyLimit) { - return compareKeys(key, bytes, keyOffset); + return ICUBinary.compareKeys(key, bytes, keyOffset); } else { - return compareKeys(key, poolBundleKeys, keyOffset - localKeyLimit); + return ICUBinary.compareKeys(key, poolBundleKeys, keyOffset - localKeyLimit); } } private int compareKeys32(CharSequence key, int keyOffset) { if(keyOffset >= 0) { - return compareKeys(key, bytes, keyOffset); + return ICUBinary.compareKeys(key, bytes, keyOffset); } else { - return compareKeys(key, poolBundleKeys, keyOffset & 0x7fffffff); + return ICUBinary.compareKeys(key, poolBundleKeys, keyOffset & 0x7fffffff); } } @@ -743,7 +733,7 @@ public final class ICUResourceBundleReader { offset += 4; ByteBuffer result = bytes.duplicate(); result.position(offset).limit(offset + length); - result = result.slice(); + result = ICUBinary.sliceWithOrder(result); if(!result.isReadOnly()) { result = result.asReadOnlyBuffer(); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java index 4a69d36d25e..cc3ee5203f5 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java @@ -345,7 +345,7 @@ public final class Norm2AllModes { protected Norm2AllModes createInstance(String key, ByteBuffer bytes) { Normalizer2Impl impl; if(bytes==null) { - impl=new Normalizer2Impl().load(ICUResourceBundle.ICU_BUNDLE+"/"+key+".nrm"); + impl=new Normalizer2Impl().load(key+".nrm"); } else { impl=new Normalizer2Impl().load(bytes); } @@ -365,8 +365,7 @@ public final class Norm2AllModes { private static final class Norm2AllModesSingleton { private Norm2AllModesSingleton(String name) { try { - Normalizer2Impl impl=new Normalizer2Impl().load( - ICUResourceBundle.ICU_BUNDLE+"/"+name+".nrm"); + Normalizer2Impl impl=new Normalizer2Impl().load(name+".nrm"); allModes=new Norm2AllModes(impl); } catch(RuntimeException e) { exception=e; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java index 33f07000729..0397ca199f0 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java @@ -494,11 +494,7 @@ public final class Normalizer2Impl { } } public Normalizer2Impl load(String name) { - try { - return load(ICUBinary.getByteBufferFromInputStream(ICUData.getRequiredStream(name))); - } catch(IOException e) { - throw new ICUUncheckedIOException(e); - } + return load(ICUBinary.getRequiredData(name)); } private void enumLcccRange(int start, int end, int norm16, UnicodeSet set) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java index 495fe3ff581..62d6f9f9808 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java @@ -98,10 +98,12 @@ public abstract class Trie2 implements Iterable { header.signature = bytes.getInt(); switch (header.signature) { case 0x54726932: - bytes.order(ByteOrder.BIG_ENDIAN); + // The buffer is already set to the trie data byte order. break; case 0x32697254: - bytes.order(ByteOrder.LITTLE_ENDIAN); + // Temporarily reverse the byte order. + boolean isBigEndian = outerByteOrder == ByteOrder.BIG_ENDIAN; + bytes.order(isBigEndian ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN); header.signature = 0x54726932; break; default: diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UBiDiProps.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UBiDiProps.java index f4bf1d7e33b..c382182c9c2 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UBiDiProps.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UBiDiProps.java @@ -20,7 +20,6 @@ package com.ibm.icu.impl; import java.io.IOException; -import java.io.InputStream; import java.nio.ByteBuffer; import java.util.Iterator; @@ -34,8 +33,7 @@ public final class UBiDiProps { // port of ubidi_openProps() private UBiDiProps() throws IOException{ - InputStream is=ICUData.getStream(ICUResourceBundle.ICU_BUNDLE+"/"+DATA_FILE_NAME); - ByteBuffer bytes=ICUBinary.getByteBufferFromInputStream(is); + ByteBuffer bytes=ICUBinary.getData(DATA_FILE_NAME); readData(bytes); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java index c45f5ae7721..d3920a8fecd 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java @@ -20,7 +20,6 @@ package com.ibm.icu.impl; import java.io.IOException; -import java.io.InputStream; import java.nio.ByteBuffer; import java.util.Iterator; @@ -37,8 +36,7 @@ public final class UCaseProps { // port of ucase_openProps() private UCaseProps() throws IOException { - InputStream is=ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/"+DATA_FILE_NAME); - ByteBuffer bytes=ICUBinary.getByteBufferFromInputStream(is); + ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME); readData(bytes); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterName.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterName.java index 1d2dbfa593b..053d1b77fcc 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterName.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterName.java @@ -8,7 +8,6 @@ package com.ibm.icu.impl; import java.io.IOException; -import java.io.InputStream; import java.nio.ByteBuffer; import java.util.Locale; import java.util.MissingResourceException; @@ -1039,7 +1038,7 @@ public final class UCharacterName /** * Default name of the name datafile */ - private static final String NAME_FILE_NAME_ = ICUResourceBundle.ICU_BUNDLE+"/unames.icu"; + private static final String FILE_NAME_ = "unames.icu"; /** * Shift count to retrieve group information */ @@ -1168,8 +1167,7 @@ public final class UCharacterName */ private UCharacterName() throws IOException { - InputStream is = ICUData.getRequiredStream(NAME_FILE_NAME_); - ByteBuffer b = ICUBinary.getByteBufferFromInputStream(is); + ByteBuffer b = ICUBinary.getRequiredData(FILE_NAME_); UCharacterNameReader reader = new UCharacterNameReader(b); reader.read(this); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java index 599dc38e03e..010682564f5 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java @@ -8,7 +8,6 @@ package com.ibm.icu.impl; import java.io.IOException; -import java.io.InputStream; import java.nio.ByteBuffer; import java.util.Iterator; import java.util.MissingResourceException; @@ -970,7 +969,7 @@ public final class UCharacterProperty /** * Default name of the datafile */ - private static final String DATA_FILE_NAME_ = ICUResourceBundle.ICU_BUNDLE+"/uprops.icu"; + private static final String DATA_FILE_NAME_ = "uprops.icu"; /** * Shift value for lead surrogate to form a supplementary character. @@ -1184,8 +1183,7 @@ public final class UCharacterProperty } // jar access - InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME_); - ByteBuffer bytes=ICUBinary.getByteBufferFromInputStream(is); + ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME_); m_unicodeVersion_ = ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, new IsAcceptable()); // Read or skip the 16 indexes. int propertyOffset = bytes.getInt(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java index 23eee022464..600eb205d84 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java @@ -13,7 +13,6 @@ package com.ibm.icu.impl; import java.io.IOException; -import java.io.InputStream; import java.nio.ByteBuffer; import java.util.MissingResourceException; @@ -116,8 +115,7 @@ public final class UPropertyAliases { } private UPropertyAliases() throws IOException { - InputStream stream = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/pnames.icu"); - ByteBuffer bytes = ICUBinary.getByteBufferFromInputStream(stream); + ByteBuffer bytes = ICUBinary.getRequiredData("pnames.icu"); load(bytes); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/ResourceBasedPeriodFormatterDataService.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/ResourceBasedPeriodFormatterDataService.java index 73cb79a7eb1..5f4ee27da96 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/ResourceBasedPeriodFormatterDataService.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/ResourceBasedPeriodFormatterDataService.java @@ -1,7 +1,7 @@ /* ****************************************************************************** - * Copyright (C) 2007-2011, International Business Machines Corporation and * - * others. All Rights Reserved. * + * Copyright (C) 2007-2014, International Business Machines Corporation and + * others. All Rights Reserved. ****************************************************************************** */ @@ -103,26 +103,21 @@ public class ResourceBasedPeriodFormatterDataService extends if (ln != null) { String name = PATH + "pfd_" + ln + ".xml"; try { - InputStream is = ICUData.getStream(getClass(), name); - if (is == null) { - throw new MissingResourceException( - "no resource named " + name, name, ""); - } else { - DataRecord dr = DataRecord.read(ln, - new XMLRecordReader(new InputStreamReader( - is, "UTF-8"))); - if (dr != null) { - // debug - // if (false && ln.equals("ar_EG")) { - // OutputStreamWriter osw = new - // OutputStreamWriter(System.out, "UTF-8"); - // XMLRecordWriter xrw = new - // XMLRecordWriter(osw); - // dr.write(xrw); - // osw.flush(); - // } - ld = new PeriodFormatterData(localeName, dr); - } + InputStream is = ICUData.getRequiredStream(getClass(), name); + DataRecord dr = DataRecord.read(ln, + new XMLRecordReader(new InputStreamReader( + is, "UTF-8"))); + if (dr != null) { + // debug + // if (false && ln.equals("ar_EG")) { + // OutputStreamWriter osw = new + // OutputStreamWriter(System.out, "UTF-8"); + // XMLRecordWriter xrw = new + // XMLRecordWriter(osw); + // dr.write(xrw); + // osw.flush(); + // } + ld = new PeriodFormatterData(localeName, dr); } } catch (UnsupportedEncodingException e) { throw new MissingResourceException( diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/BreakIteratorFactory.java b/icu4j/main/classes/core/src/com/ibm/icu/text/BreakIteratorFactory.java index 13c3428b958..00dafd56577 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/BreakIteratorFactory.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/BreakIteratorFactory.java @@ -8,7 +8,6 @@ package com.ibm.icu.text; import java.io.IOException; -import java.io.InputStream; import java.nio.ByteBuffer; import java.util.Locale; import java.util.MissingResourceException; @@ -111,9 +110,8 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim try { String typeKey = KIND_NAMES[kind]; String brkfname = rb.getStringWithFallback("boundaries/" + typeKey); - String rulesFileName = ICUResourceBundle.ICU_BUNDLE +ICUResourceBundle.ICU_BRKITR_NAME+ "/" + brkfname; - InputStream ruleStream = ICUData.getStream(rulesFileName); - bytes = ICUBinary.getByteBufferFromInputStream(ruleStream); + String rulesFileName = ICUData.ICU_BRKITR_NAME+ '/' + brkfname; + bytes = ICUBinary.getData(rulesFileName); } catch (Exception e) { throw new MissingResourceException(e.toString(),"",""); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryData.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryData.java index 5665fb16cf7..c6bfe1dd931 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryData.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryData.java @@ -8,7 +8,6 @@ package com.ibm.icu.text; import java.io.IOException; -import java.io.InputStream; import java.nio.ByteBuffer; import com.ibm.icu.impl.Assert; @@ -45,9 +44,8 @@ final class DictionaryData { public static DictionaryMatcher loadDictionaryFor(String dictType) throws IOException { ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BRKITR_BASE_NAME); String dictFileName = rb.getStringWithFallback("dictionaries/" + dictType); - dictFileName = ICUResourceBundle.ICU_BUNDLE +ICUResourceBundle.ICU_BRKITR_NAME+ "/" + dictFileName; - InputStream is = ICUData.getStream(dictFileName); - ByteBuffer bytes = ICUBinary.getByteBufferFromInputStream(is); + dictFileName = ICUData.ICU_BRKITR_NAME + '/' + dictFileName; + ByteBuffer bytes = ICUBinary.getRequiredData(dictFileName); ICUBinary.readHeader(bytes, DATA_FORMAT_ID, null); int[] indexes = new int[IX_COUNT]; // TODO: read indexes[IX_STRING_TRIE_OFFSET] first, then read a variable-length indexes[] diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java index e85ce8f93cd..0745238c6e8 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java @@ -9,10 +9,12 @@ package com.ibm.icu.text; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import com.ibm.icu.impl.CharTrie; import com.ibm.icu.impl.ICUBinary; import com.ibm.icu.impl.Trie; +import com.ibm.icu.impl.ICUBinary.Authenticate; /** *

Internal class used for Rule Based Break Iterators

@@ -32,7 +34,20 @@ final class RBBIDataWrapper { CharTrie fTrie; String fRuleSource; int fStatusTable[]; - + + private boolean isBigEndian; + + static final int DATA_FORMAT = 0x42726b20; // "Brk " + static final int FORMAT_VERSION = 0x03010000; // 3.1 + + private static final class IsAcceptable implements Authenticate { + // @Override when we switch to Java 6 + public boolean isDataVersionAcceptable(byte version[]) { + return version[0] == (FORMAT_VERSION >>> 24); + } + } + private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); + // // Indexes to fields in the ICU4C style binary form of the RBBI Data Header // Used by the rule compiler when flattening the data. @@ -70,12 +85,12 @@ final class RBBIDataWrapper { // Index offsets to header fields of a state table // struct RBBIStateTable {... in the C version. // - final static int NUMSTATES = 0; - final static int ROWLEN = 2; - final static int FLAGS = 4; - final static int RESERVED_2 = 6; - final static int ROW_DATA = 8; - + static final int NUMSTATES = 0; + static final int ROWLEN = 2; + static final int FLAGS = 4; + //ivate static final int RESERVED_2 = 6; + private static final int ROW_DATA = 8; + // Bit selectors for the "FLAGS" field of the state table header // enum RBBIStateTableFlags in the C version. // @@ -153,18 +168,20 @@ final class RBBIDataWrapper { RBBIDataWrapper This = new RBBIDataWrapper(); - // Seek past the ICU data header. - // TODO: verify that the header looks good. - ICUBinary.skipBytes(bytes, 0x80); + ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE); + This.isBigEndian = bytes.order() == ByteOrder.BIG_ENDIAN; // Read in the RBBI data header... This.fHeader = new RBBIDataHeader(); This.fHeader.fMagic = bytes.getInt(); - This.fHeader.fVersion = bytes.getInt(); - This.fHeader.fFormatVersion[0] = (byte) (This.fHeader.fVersion >> 24); - This.fHeader.fFormatVersion[1] = (byte) (This.fHeader.fVersion >> 16); - This.fHeader.fFormatVersion[2] = (byte) (This.fHeader.fVersion >> 8); - This.fHeader.fFormatVersion[3] = (byte) (This.fHeader.fVersion); + // Read the same 4 bytes as an int and as a byte array: The data format could be + // the old fVersion=1 (TODO: probably not with a real ICU data header?) + // or the new fFormatVersion=3.x. + This.fHeader.fVersion = bytes.getInt(bytes.position()); + This.fHeader.fFormatVersion[0] = bytes.get(); + This.fHeader.fFormatVersion[1] = bytes.get(); + This.fHeader.fFormatVersion[2] = bytes.get(); + This.fHeader.fFormatVersion[3] = bytes.get(); This.fHeader.fLength = bytes.getInt(); This.fHeader.fCatCount = bytes.getInt(); This.fHeader.fFTable = bytes.getInt(); @@ -322,14 +339,20 @@ final class RBBIDataWrapper { ///CLOVER:OFF // Getters for fields from the state table header // - final static int getNumStates(short table[]) { - int hi = table[NUMSTATES]; - int lo = table[NUMSTATES+1]; - int val = (hi<<16) + (lo&0x0000ffff); - return val; + private int getStateTableNumStates(short table[]) { + if (isBigEndian) { + return (table[NUMSTATES] << 16) | (table[NUMSTATES+1] & 0xffff); + } else { + return (table[NUMSTATES+1] << 16) | (table[NUMSTATES] & 0xffff); + } } ///CLOVER:ON + int getStateTableFlags(short table[]) { + // This works for up to 15 flags bits. + return table[isBigEndian ? FLAGS + 1 : FLAGS]; + } + ///CLOVER:OFF /* Debug function to display the break iterator data. */ void dump() { @@ -395,7 +418,7 @@ final class RBBIDataWrapper { System.out.print("-"); } System.out.println(); - for (state=0; state< getNumStates(table); state++) { + for (state=0; state< getStateTableNumStates(table); state++) { dumpRow(table, state); } System.out.println(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java index af0c131f600..20846fcc295 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java @@ -1,5 +1,5 @@ // -// Copyright (C) 2002-2009, International Business Machines Corporation and others. +// Copyright (C) 2002-2014, International Business Machines Corporation and others. // All Rights Reserved. // // @@ -16,6 +16,7 @@ import java.util.Map; import java.util.Set; import com.ibm.icu.impl.Assert; +import com.ibm.icu.impl.ICUBinary; import com.ibm.icu.impl.ICUDebug; class RBBIRuleBuilder { @@ -185,12 +186,8 @@ class RBBIRuleBuilder { // // Write out an ICU Data Header - // TODO: actually create a real header, rather than just a placeholder. - // The empty placeholder is ok for compile-and-go from within ICU4J. - // Replicating the ICU4C genbrk tool for building .brk resources would need a real header. // - byte[] ICUDataHeader = new byte[0x80]; - dos.write(ICUDataHeader); + ICUBinary.writeHeader(RBBIDataWrapper.DATA_FORMAT, RBBIDataWrapper.FORMAT_VERSION, 0, dos); // // Write out the RBBIDataHeader diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java index b0e08d78895..0dd194f386d 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java @@ -1200,7 +1200,7 @@ public class RuleBasedBreakIterator extends BreakIterator { int state = START_STATE; int row = fRData.getRowIndex(state); short category = 3; - short flagsState = stateTable[RBBIDataWrapper.FLAGS+1]; + int flagsState = fRData.getStateTableFlags(stateTable); int mode = RBBI_RUN; if ((flagsState & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) { category = 2; @@ -1373,7 +1373,7 @@ public class RuleBasedBreakIterator extends BreakIterator { int initialPosition = 0; int lookaheadResult = 0; boolean lookAheadHardBreak = - (stateTable[RBBIDataWrapper.FLAGS+1] & RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK) != 0; + (fRData.getStateTableFlags(stateTable) & RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK) != 0; // handlePrevious() never gets the rule status. // Flag the status as invalid; if the user ever asks for status, we will need @@ -1392,7 +1392,7 @@ public class RuleBasedBreakIterator extends BreakIterator { row = fRData.getRowIndex(state); category = 3; // TODO: obsolete? from the old start/run mode scheme? mode = RBBI_RUN; - if ((stateTable[RBBIDataWrapper.FLAGS+1] & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) { + if ((fRData.getStateTableFlags(stateTable) & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) { category = 2; mode = RBBI_START; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java b/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java index 888377b69e6..1f39a709747 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java @@ -11,7 +11,6 @@ package com.ibm.icu.text; import java.io.DataOutputStream; import java.io.IOException; -import java.io.InputStream; import java.io.LineNumberReader; import java.io.Reader; import java.nio.ByteBuffer; @@ -32,6 +31,7 @@ import java.util.regex.Pattern; import com.ibm.icu.impl.ICUBinary; import com.ibm.icu.impl.Trie2; import com.ibm.icu.impl.Trie2Writable; +import com.ibm.icu.impl.ICUBinary.Authenticate; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UCharacterCategory; import com.ibm.icu.lang.UProperty; @@ -2172,24 +2172,32 @@ public class SpoofChecker { } } + private static final int DATA_FORMAT = 0x43667520; // "Cfu " + private static final class IsAcceptable implements Authenticate { + // @Override when we switch to Java 6 + public boolean isDataVersionAcceptable(byte version[]) { + return version[0] == 1; + } + } + private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); - // getDefault() - Create a SpoofData instance that is built from - // the data baked into the default ICU data. + private static final class DefaultData { + private static SpoofData INSTANCE = null; + static { + try { + INSTANCE = new SpoofData(ICUBinary.getRequiredData("confusables.cfu")); + } catch (IOException ignored) { + } + } + } + + /** + * @return instance for Unicode standard data + */ static SpoofData getDefault() { - // TODO: Cache it. Lazy create, keep until cleanup. - SpoofData This = null; - try { - InputStream is = com.ibm.icu.impl.ICUData.getRequiredStream(com.ibm.icu.impl.ICUResourceBundle.ICU_BUNDLE - + "/confusables.cfu"); - This = new SpoofData(ICUBinary.getByteBufferFromInputStream(is)); - is.close(); - } - catch (IOException e) { - // Return null in this case. - } - return This; + return DefaultData.INSTANCE; } // SpoofChecker Data constructor for use from data builder. @@ -2200,9 +2208,7 @@ public class SpoofChecker { // Constructor for use when creating from prebuilt default data. // A ByteBuffer is what the ICU internal data loading functions provide. SpoofData(ByteBuffer bytes) throws java.io.IOException { - // Seek past the ICU data header. - // TODO: verify that the header looks good. - ICUBinary.skipBytes(bytes, 0x80); + ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE); bytes.mark(); readData(bytes); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/StringPrep.java b/icu4j/main/classes/core/src/com/ibm/icu/text/StringPrep.java index e2f6d3133fa..d2987af4a16 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/StringPrep.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/StringPrep.java @@ -14,8 +14,6 @@ import java.nio.ByteBuffer; import com.ibm.icu.impl.CharTrie; import com.ibm.icu.impl.ICUBinary; -import com.ibm.icu.impl.ICUData; -import com.ibm.icu.impl.ICUResourceBundle; import com.ibm.icu.impl.StringPrepDataReader; import com.ibm.icu.impl.UBiDiProps; import com.ibm.icu.lang.UCharacter; @@ -272,7 +270,10 @@ public final class StringPrep { */ public StringPrep(InputStream inputStream) throws IOException{ // TODO: Add a public constructor that takes ByteBuffer directly. - ByteBuffer bytes = ICUBinary.getByteBufferFromInputStream(inputStream); + this(ICUBinary.getByteBufferFromInputStream(inputStream)); + } + + private StringPrep(ByteBuffer bytes) throws IOException { StringPrepDataReader reader = new StringPrepDataReader(bytes); // read the indexes @@ -328,15 +329,10 @@ public final class StringPrep { } if (instance == null) { - InputStream stream = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/" - + PROFILE_NAMES[profile] + ".spp"); - if (stream != null) { + ByteBuffer bytes = ICUBinary.getRequiredData(PROFILE_NAMES[profile] + ".spp"); + if (bytes != null) { try { - try { - instance = new StringPrep(stream); - } finally { - stream.close(); - } + instance = new StringPrep(bytes); } catch (IOException e) { throw new ICUUncheckedIOException(e); } diff --git a/icu4j/main/tests/charset/src/com/ibm/icu/dev/test/charset/TestConversion.java b/icu4j/main/tests/charset/src/com/ibm/icu/dev/test/charset/TestConversion.java index 0e5f4415778..67443aa0991 100644 --- a/icu4j/main/tests/charset/src/com/ibm/icu/dev/test/charset/TestConversion.java +++ b/icu4j/main/tests/charset/src/com/ibm/icu/dev/test/charset/TestConversion.java @@ -1,9 +1,7 @@ /* ******************************************************************************* - * Copyright (C) 2002-2012, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - * + * Copyright (C) 2002-2014, International Business Machines Corporation and + * others. All Rights Reserved. ******************************************************************************* */ @@ -216,7 +214,6 @@ public class TestConversion extends ModuleTest { private void FromUnicodeCase(ConversionCase cc) { - // create charset encoder for conversion test CharsetProviderICU provider = new CharsetProviderICU(); CharsetEncoder encoder = null; @@ -227,17 +224,21 @@ public class TestConversion extends ModuleTest { ? (Charset) provider.charsetForName(cc.charset.substring(1), "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader()) : (Charset) provider.charsetForName(cc.charset); - encoder = (CharsetEncoder) charset.newEncoder(); - encoder.onMalformedInput(CodingErrorAction.REPLACE); - encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); - if (encoder instanceof CharsetEncoderICU) { - ((CharsetEncoderICU)encoder).setFallbackUsed(cc.fallbacks); - if (((CharsetEncoderICU)encoder).isFallbackUsed() != cc.fallbacks) { - errln("Fallback could not be set for " + cc.charset); + if (charset != null) { + encoder = (CharsetEncoder) charset.newEncoder(); + encoder.onMalformedInput(CodingErrorAction.REPLACE); + encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + if (encoder instanceof CharsetEncoderICU) { + ((CharsetEncoderICU)encoder).setFallbackUsed(cc.fallbacks); + if (((CharsetEncoderICU)encoder).isFallbackUsed() != cc.fallbacks) { + errln("Fallback could not be set for " + cc.charset); + } } } - } catch (Exception e) { + encoder = null; + } + if (encoder == null) { if (cc.charset.charAt(0) == UNSUPPORTED_CHARSET_SYMBOL) { logln("Skipping test:(" + cc.charset.substring(1) + ") due to ICU Charset not supported at this time"); } else { @@ -245,7 +246,7 @@ public class TestConversion extends ModuleTest { } return; } - + // set the callback for the encoder if (cc.cbErrorAction != null) { if (cc.cbEncoder != null) { @@ -514,12 +515,16 @@ public class TestConversion extends ModuleTest { ? (Charset) provider.charsetForName(cc.charset.substring(1), "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader()) : (Charset) provider.charsetForName(cc.charset); - decoder = (CharsetDecoder) charset.newDecoder(); - decoder.onMalformedInput(CodingErrorAction.REPLACE); - decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); - + if (charset != null) { + decoder = (CharsetDecoder) charset.newDecoder(); + decoder.onMalformedInput(CodingErrorAction.REPLACE); + decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + } } catch (Exception e) { // TODO implement loading of test data. + decoder = null; + } + if (decoder == null) { if (cc.charset.charAt(0) == UNSUPPORTED_CHARSET_SYMBOL) { logln("Skipping test:(" + cc.charset.substring(1) + ") due to ICU Charset not supported at this time"); } else { @@ -899,12 +904,12 @@ public class TestConversion extends ModuleTest { //checking for converter that are not supported at this point try{ - if(charset.name()=="BOCU-1" ||charset.name()== "SCSU"|| charset.name()=="lmbcs1" || charset.name()== "lmbcs2" || + if(charset==null || + charset.name()=="BOCU-1" ||charset.name()== "SCSU"|| charset.name()=="lmbcs1" || charset.name()== "lmbcs2" || charset.name()== "lmbcs3" || charset.name()== "lmbcs4" || charset.name()=="lmbcs5" || charset.name()=="lmbcs6" || charset.name()== "lmbcs8" || charset.name()=="lmbcs11" || charset.name()=="lmbcs16" || charset.name()=="lmbcs17" || charset.name()=="lmbcs18"|| charset.name()=="lmbcs19"){ - - logln("Converter not supported at this point :" +charset.displayName()); + logln("Converter not supported at this point :" + cc.charset); return; }