ICU-10939 ICU4J optionally read data from .dat and .res, from ICUConfig-ured dataPath; make ICU4J support little-endian binary data files

X-SVN-Rev: 36106
This commit is contained in:
Markus Scherer 2014-07-31 18:46:54 +00:00
parent 96835f80a5
commit e87758a436
28 changed files with 1164 additions and 649 deletions

View file

@ -1,11 +1,9 @@
/**
*******************************************************************************
* Copyright (C) 2006-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
* Copyright (C) 2006-2014, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
*******************************************************************************
*/
*/
package com.ibm.icu.charset;
@ -253,14 +251,14 @@ public class CharsetCallback {
while (i < length) {
valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
valueStringLength += itou(valueString, valueStringLength, buffer[i++], 16, 4);
}
} else {
if (((String)context).equals(ESCAPE_JAVA)) {
while (i < length) {
valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */
valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
valueStringLength += itou(valueString, valueStringLength, buffer[i++], 16, 4);
}
} else if (((String)context).equals(ESCAPE_C)) {
valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
@ -270,7 +268,7 @@ public class CharsetCallback {
valueStringLength = itou(valueString, valueStringLength, cp, 16, 8);
} else {
valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */
valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
valueStringLength += itou(valueString, valueStringLength, buffer[0], 16, 4);
}
} else if (((String)context).equals(ESCAPE_XML_DEC)) {
valueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
@ -278,7 +276,7 @@ public class CharsetCallback {
if (length == 2) {
valueStringLength += itou(valueString, valueStringLength, cp, 10, 0);
} else {
valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 10, 0);
valueStringLength += itou(valueString, valueStringLength, buffer[0], 10, 0);
}
valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
} else if (((String)context).equals(ESCAPE_XML_HEX)) {
@ -288,7 +286,7 @@ public class CharsetCallback {
if (length == 2) {
valueStringLength += itou(valueString, valueStringLength, cp, 16, 0);
} else {
valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 0);
valueStringLength += itou(valueString, valueStringLength, buffer[0], 16, 0);
}
valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
} else if (((String)context).equals(ESCAPE_UNICODE)) {
@ -298,7 +296,7 @@ public class CharsetCallback {
if (length == 2) {
valueStringLength += itou(valueString, valueStringLength,cp, 16, 4);
} else {
valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
valueStringLength += itou(valueString, valueStringLength, buffer[0], 16, 4);
}
valueString[valueStringLength++] = UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */
} else if (((String)context).equals(ESCAPE_CSS2)) {
@ -311,7 +309,7 @@ public class CharsetCallback {
while (i < length) {
valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
valueStringLength += itou(valueString, valueStringLength, buffer[i++], 16, 4);
}
}
}

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
* Copyright (C) 2008-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
* Copyright (C) 2008-2014, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.charset;
@ -271,13 +271,13 @@ class CharsetISO2022 extends CharsetICU {
char[] table;
int value;
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
if (c >= 0x10000 && (sharedData.mbcs.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
if (c >= 0x10000 && !sharedData.mbcs.hasSupplementary()) {
return 0;
}
/* convert the Unicode code point in c into codepage bytes */
table = sharedData.mbcs.fromUnicodeTable;
/* get the byte for the output */
value = CharsetMBCS.MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c);
value = CharsetMBCS.MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeChars, c);
/* get the byte for the output */
retval[0] = value & 0xff;
if (value >= 0xf00) {

View file

@ -10,12 +10,9 @@
package com.ibm.icu.charset;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
final class UConverterAlias {
static final int UNNORMALIZED = 0;
@ -115,13 +112,12 @@ final class UConverterAlias {
return (alias.length() != 0);
}
private static final String CNVALIAS_DATA_FILE_NAME = ICUResourceBundle.ICU_BUNDLE + "/cnvalias.icu";
private static final String CNVALIAS_DATA_FILE_NAME = "cnvalias.icu";
private static final synchronized boolean haveAliasData()
throws IOException{
boolean needInit;
// agljport:todo umtx_lock(NULL);
needInit = gAliasData == null;
/* load converter alias data from file if necessary */
@ -129,10 +125,8 @@ final class UConverterAlias {
ByteBuffer data = null;
int[] tableArray = null;
int tableStart;
//byte[] reservedBytes = null;
InputStream i = ICUData.getRequiredStream(CNVALIAS_DATA_FILE_NAME);
ByteBuffer b = ICUBinary.getByteBufferFromInputStream(i);
ByteBuffer b = ICUBinary.getRequiredData(CNVALIAS_DATA_FILE_NAME);
UConverterAliasDataReader reader = new UConverterAliasDataReader(b);
tableArray = reader.readToc(offsetsCount);
@ -160,21 +154,10 @@ final class UConverterAlias {
if (gOptionTable[0] != STD_NORMALIZED) {
throw new IOException("Unsupported alias normalization");
}
// agljport:todo umtx_lock(NULL);
if (gAliasData == null) {
gAliasData = data;
data = null;
// agljport:fix ucln_common_registerCleanup(UCLN_COMMON_IO,
// io_cleanup);
}
// agljport:todo umtx_unlock(NULL);
/* if a different thread set it first, then close the extra data */
if (data != null) {
// agljport:fix udata_close(data); /* NULL if it was set
// correctly */
}
}

View file

@ -9,8 +9,14 @@ package com.ibm.icu.charset;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.IntBuffer;
import com.ibm.icu.charset.CharsetMBCS.MBCSHeader;
import com.ibm.icu.charset.CharsetMBCS.MBCSToUFallback;
import com.ibm.icu.charset.CharsetMBCS.UConverterMBCSTable;
import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.InvalidFormatException;
/**
* ucnvmbcs.h
@ -395,9 +401,17 @@ import com.ibm.icu.impl.ICUBinary;
* Indexes and lengths stored in the fromUTableValues[].
*/
final class UConverterDataReader implements ICUBinary.Authenticate {
final class UConverterDataReader {
//private final static boolean debug = ICUDebug.enabled("UConverterDataReader");
private static final class IsAcceptable implements ICUBinary.Authenticate {
// @Override when we switch to Java 6
public boolean isDataVersionAcceptable(byte formatVersion[]) {
return formatVersion[0] == 6;
}
}
private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
/*
* UConverterDataReader(UConverterDataReader r)
{
@ -405,10 +419,8 @@ final class UConverterDataReader implements ICUBinary.Authenticate {
unicodeVersion = r.unicodeVersion;
}
*/
/* the number bytes read from the buffer */
int bytesRead = 0;
/* the number of bytes read for static data */
int staticDataBytesRead = 0;
/** The buffer position after the static data. */
private int posAfterStaticData;
/**
* <p>Protected constructor.</p>
@ -420,7 +432,7 @@ final class UConverterDataReader implements ICUBinary.Authenticate {
//if(debug) System.out.println("Bytes in buffer " + bytes.remaining());
byteBuffer = bytes;
/*unicodeVersion = */ICUBinary.readHeader(byteBuffer, DATA_FORMAT_ID, this);
/*unicodeVersion = */ICUBinary.readHeader(byteBuffer, DATA_FORMAT_ID, IS_ACCEPTABLE);
//if(debug) System.out.println("Bytes left in byteBuffer " + byteBuffer.remaining());
}
@ -429,95 +441,137 @@ final class UConverterDataReader implements ICUBinary.Authenticate {
protected void readStaticData(UConverterStaticData sd) throws IOException
{
int bRead = 0;
sd.structSize = byteBuffer.getInt();
bRead +=4;
byte[] name = new byte[UConverterConstants.MAX_CONVERTER_NAME_LENGTH];
byteBuffer.get(name);
bRead +=name.length;
sd.name = new String(name, 0, name.length);
sd.name = new String(name, "US-ASCII");
sd.codepage = byteBuffer.getInt();
bRead +=4;
sd.platform = byteBuffer.get();
bRead++;
sd.conversionType = byteBuffer.get();
bRead++;
sd.minBytesPerChar = byteBuffer.get();
bRead++;
sd.maxBytesPerChar = byteBuffer.get();
bRead++;
byteBuffer.get(sd.subChar);
bRead += sd.subChar.length;
sd.subCharLen = byteBuffer.get();
bRead++;
sd.hasToUnicodeFallback = byteBuffer.get();
bRead++;
sd.hasFromUnicodeFallback = byteBuffer.get();
bRead++;
sd.unicodeMask = (short)(byteBuffer.get() & 0xff);
bRead++;
sd.subChar1 = byteBuffer.get();
bRead++;
byteBuffer.get(sd.reserved);
bRead += sd.reserved.length;
staticDataBytesRead = bRead;
bytesRead += bRead;
posAfterStaticData = byteBuffer.position();
}
int bytesReadAfterStaticData() {
return byteBuffer.position() - posAfterStaticData;
}
protected void readMBCSHeader(CharsetMBCS.MBCSHeader h) throws IOException
{
byteBuffer.get(h.version);
bytesRead += h.version.length;
h.countStates = byteBuffer.getInt();
bytesRead+=4;
h.countToUFallbacks = byteBuffer.getInt();
bytesRead+=4;
h.offsetToUCodeUnits = byteBuffer.getInt();
bytesRead+=4;
h.offsetFromUTable = byteBuffer.getInt();
bytesRead+=4;
h.offsetFromUBytes = byteBuffer.getInt();
bytesRead+=4;
h.flags = byteBuffer.getInt();
bytesRead+=4;
h.fromUBytesLength = byteBuffer.getInt();
bytesRead+=4;
if (h.version[0] == 5 && h.version[1] >= 3) {
h.options = byteBuffer.getInt();
bytesRead+=4;
if ((h.options & CharsetMBCS.MBCS_OPT_NO_FROM_U) != 0) {
h.fullStage2Length = byteBuffer.getInt();
bytesRead+=4;
}
}
}
protected void readMBCSTable(int[][] stateTableArray, CharsetMBCS.MBCSToUFallback[] toUFallbacksArray, char[] unicodeCodeUnitsArray, char[] fromUnicodeTableArray, byte[] fromUnicodeBytesArray) throws IOException
protected void readMBCSTable(MBCSHeader header, UConverterMBCSTable mbcsTable) throws IOException
{
int i, j;
for(i = 0; i < stateTableArray.length; ++i){
for(j = 0; j < stateTableArray[i].length; ++j){
stateTableArray[i][j] = byteBuffer.getInt();
bytesRead+=4;
IntBuffer intBuffer = byteBuffer.asIntBuffer();
mbcsTable.countStates = (byte) header.countStates;
mbcsTable.stateTable = new int[header.countStates][256];
int i;
for(i = 0; i < header.countStates; ++i) {
intBuffer.get(mbcsTable.stateTable[i]);
}
mbcsTable.countToUFallbacks = header.countToUFallbacks;
mbcsTable.toUFallbacks = new MBCSToUFallback[header.countToUFallbacks];
for(i = 0; i < header.countToUFallbacks; ++i) {
int offset = intBuffer.get();
int codePoint = intBuffer.get();
mbcsTable.toUFallbacks[i] = new MBCSToUFallback(offset, codePoint);
}
// Skip as many bytes as we have read from the IntBuffer.
int length = intBuffer.position() * 4;
ICUBinary.skipBytes(byteBuffer, length);
// Consider leaving some large arrays as CharBuffer/IntBuffer rather than
// reading them into Java arrays, to reduce initialization time and memory usage,
// at the cost of some performance.
// For example: unicodeCodeUnits, fromUnicodeTable, fromUnicodeInts.
// Take care not to modify the buffer contents for swaplfnl.
CharBuffer charBuffer = byteBuffer.asCharBuffer();
length = header.offsetFromUTable - header.offsetToUCodeUnits;
assert (length & 1) == 0;
mbcsTable.unicodeCodeUnits = new char[length / 2];
charBuffer.get(mbcsTable.unicodeCodeUnits);
// Skip as many bytes as we have read from the CharBuffer.
ICUBinary.skipBytes(byteBuffer, length);
length = header.offsetFromUBytes - header.offsetFromUTable;
assert (length & 1) == 0;
int fromUTableCharsLength;
if (mbcsTable.outputType == CharsetMBCS.MBCS_OUTPUT_1) {
// single-byte table stage1 + stage2
fromUTableCharsLength = length / 2;
} else if (mbcsTable.hasSupplementary()) {
// stage1 for Unicode limit 0x110000 >> 10
fromUTableCharsLength = 0x440;
} else {
// stage1 for BMP limit 0x10000 >> 10
fromUTableCharsLength = 0x40;
}
mbcsTable.fromUnicodeTable = new char[fromUTableCharsLength];
charBuffer.get(mbcsTable.fromUnicodeTable);
if (mbcsTable.outputType != CharsetMBCS.MBCS_OUTPUT_1) {
// Read both stage1 and stage2 together into an int[] array.
// Keeping the short stage1 in the array avoids offsetting at runtime.
// The stage1 part of this array will not be used.
assert (length & 3) == 0;
mbcsTable.fromUnicodeTableInts = new int[length / 4];
byteBuffer.asIntBuffer().get(mbcsTable.fromUnicodeTableInts);
}
// Skip as many bytes as are in stage1 + stage2.
ICUBinary.skipBytes(byteBuffer, length);
mbcsTable.fromUBytesLength = header.fromUBytesLength;
boolean noFromU = ((header.options & CharsetMBCS.MBCS_OPT_NO_FROM_U) != 0);
if (!noFromU) {
switch (mbcsTable.outputType) {
case CharsetMBCS.MBCS_OUTPUT_1:
case CharsetMBCS.MBCS_OUTPUT_2:
case CharsetMBCS.MBCS_OUTPUT_2_SISO:
case CharsetMBCS.MBCS_OUTPUT_3_EUC:
mbcsTable.fromUnicodeChars = new char[header.fromUBytesLength / 2];
byteBuffer.asCharBuffer().get(mbcsTable.fromUnicodeChars);
ICUBinary.skipBytes(byteBuffer, header.fromUBytesLength & ~1);
break;
case CharsetMBCS.MBCS_OUTPUT_3:
case CharsetMBCS.MBCS_OUTPUT_4_EUC:
mbcsTable.fromUnicodeBytes = new byte[header.fromUBytesLength];
byteBuffer.get(mbcsTable.fromUnicodeBytes);
break;
case CharsetMBCS.MBCS_OUTPUT_4:
mbcsTable.fromUnicodeInts = new int[header.fromUBytesLength / 4];
byteBuffer.asIntBuffer().get(mbcsTable.fromUnicodeInts);
ICUBinary.skipBytes(byteBuffer, header.fromUBytesLength & ~3);
break;
default:
// Cannot occur, caller checked already.
assert false;
}
}
for(i = 0; i < toUFallbacksArray.length; ++i) {
toUFallbacksArray[i].offset = byteBuffer.getInt();
bytesRead+=4;
toUFallbacksArray[i].codePoint = byteBuffer.getInt();
bytesRead+=4;
}
for(i = 0; i < unicodeCodeUnitsArray.length; ++i){
unicodeCodeUnitsArray[i] = byteBuffer.getChar();
bytesRead+=2;
}
for(i = 0; i < fromUnicodeTableArray.length; ++i){
fromUnicodeTableArray[i] = byteBuffer.getChar();
bytesRead+=2;
}
for(i = 0; i < fromUnicodeBytesArray.length; ++i){
fromUnicodeBytesArray[i] = byteBuffer.get();
bytesRead++;
} else {
// Optional utf8Friendly mbcsIndex -- _MBCSHeader.version 4.3 (ICU 3.8) and higher.
// Needed for reconstituting omitted data.
mbcsTable.mbcsIndex = byteBuffer.asCharBuffer();
}
}
@ -527,60 +581,33 @@ final class UConverterDataReader implements ICUBinary.Authenticate {
StringBuilder name = new StringBuilder();
while((c = (char)byteBuffer.get()) != 0){
name.append(c);
bytesRead++;
}
bytesRead++/*for null terminator*/;
return name.toString();
}
//protected int[] readExtIndexes(int skip) throws IOException
protected ByteBuffer readExtIndexes(int skip) throws IOException
protected ByteBuffer readExtIndexes(int skip) throws IOException, InvalidFormatException
{
ICUBinary.skipBytes(byteBuffer, skip);
int n = byteBuffer.getInt();
bytesRead+=4;
int[] indexes = new int[n];
indexes[0] = n;
for(int i = 1; i < n; ++i) {
indexes[i] = byteBuffer.getInt();
bytesRead+=4;
ByteBuffer b = ICUBinary.sliceWithOrder(byteBuffer);
int lengthOfIndexes = b.getInt(0);
if (lengthOfIndexes < 32) {
throw new InvalidFormatException();
}
//return indexes;
ByteBuffer b = ByteBuffer.allocate(indexes[31]);
for(int i = 0; i < n; ++i) {
b.putInt(indexes[i]);
}
int len = b.remaining();
byteBuffer.get(b.array(), b.position(), len);
bytesRead += len;
int numBytesExtensionStructure = b.getInt(31 * 4);
b.limit(numBytesExtensionStructure);
ICUBinary.skipBytes(byteBuffer, numBytesExtensionStructure);
return b;
}
/*protected byte[] readExtTables(int n) throws IOException
{
byte[] tables = new byte[n];
int len = byteBuffer.get(tables);
if(len==-1){
throw new IOException("Read failed");
}
bytesRead += len;
return tables;
}*/
byte[] getDataFormatVersion(){
return DATA_FORMAT_VERSION;
}
/**
* Inherited method
* Data formatVersion 6.1 and higher has a unicodeMask.
*/
public boolean isDataVersionAcceptable(byte version[]){
return version[0] == DATA_FORMAT_VERSION[0];
boolean dataFormatHasUnicodeMask() {
int formatVersion0 = byteBuffer.get(16) & 0xff;
return formatVersion0 > 6 || (formatVersion0 == 6 && byteBuffer.get(17) != 0);
}
/* byte[] getUnicodeVersion(){
return unicodeVersion;
}*/
// private data members -------------------------------------------------
/**
@ -597,5 +624,4 @@ final class UConverterDataReader implements ICUBinary.Authenticate {
*/
// DATA_FORMAT_ID_ values taken from icu4c isCnvAcceptable (ucnv_bld.c)
private static final int DATA_FORMAT_ID = 0x636e7674; // dataFormat="cnvt"
private static final byte DATA_FORMAT_VERSION[] = {(byte)0x6};
}

View file

@ -12,12 +12,11 @@
package com.ibm.icu.impl.coll;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.MissingResourceException;
import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
/**
* Collation root provider.
@ -42,20 +41,20 @@ public final class CollationRoot { // purely static
}
static { // Corresponds to C++ load() function.
CollationTailoring t = new CollationTailoring(null);
// TODO: Optionally load from a .dat file or stand-alone .icu file.
String path = ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu";
InputStream is = ICUData.getRequiredStream(path);
CollationTailoring t = null;
RuntimeException e2 = null;
try {
CollationDataReader.read(null, ICUBinary.getByteBufferFromInputStream(is), t);
ByteBuffer bytes = ICUBinary.getRequiredData("coll/ucadata.icu");
CollationTailoring t2 = new CollationTailoring(null);
CollationDataReader.read(null, bytes, t2);
// Keep t=null until after the root data has been read completely.
// Otherwise we would set a non-null root object if the data reader throws an exception.
t = t2;
} catch(IOException e) {
t = null;
e2 = new MissingResourceException(
"IOException while reading CLDR root data",
"CollationRoot", path);
"CollationRoot", ICUData.ICU_BUNDLE + "/coll/ucadata.icu");
} catch(RuntimeException e) {
t = null;
e2 = e;
}
rootSingleton = t;

View file

@ -3,7 +3,7 @@
#* Copyright (C) 2008-2014, International Business Machines Corporation and *
#* others. All Rights Reserved. *
#*******************************************************************************
#* This is the properties contains ICU runtime configuration
#* This is the properties file which contains ICU runtime configuration.
#*
#
@ -20,6 +20,7 @@ com.ibm.icu.util.TimeZone.DefaultTimeZoneType = ICU
com.ibm.icu.text.MessagePattern.ApostropheMode = DOUBLE_OPTIONAL
#
# [Internal Use Only]
# By default, DecimalFormat uses some internal equivalent character
# data in addition to ones in DecimalFormatSymbols for parsing
# decimal/grouping separators. When this property is true,
@ -29,8 +30,18 @@ com.ibm.icu.text.MessagePattern.ApostropheMode = DOUBLE_OPTIONAL
# @internal
com.ibm.icu.text.DecimalFormat.SkipExtendedSeparatorParsing = false
# File system path where ICU looks for binary data files.
# If not empty, then ICU looks for binary data files before looking for data on the classpath.
# This string may contain multiple paths, see File.pathSeparatorChar.
# Spaces (U+0020) around each path are trimmed away. Empty paths are ignored.
# There may be individual files, for example, zoneinfo64.res,
# or ICU4C .dat package files, for example, collation.dat or icudt54l.dat.
# Each ICU data file may contain little-endian or big-endian data.
# Each ICU data file's charset must be ASCII. (Platform type 'l' or 'b' but not 'e'.)
# @draft ICU 54
com.ibm.icu.impl.ICUBinary.dataPath =
#
#
# [Internal Use Only]
# Disable resource path scan for building full locale name list
# at run time.

View file

@ -7,17 +7,262 @@
package com.ibm.icu.impl;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.List;
import java.util.MissingResourceException;
import com.ibm.icu.util.ICUUncheckedIOException;
import com.ibm.icu.util.VersionInfo;
public final class ICUBinary
{
public final class ICUBinary {
/**
* Reads the ICU .dat package file format.
* Most methods do not modify the ByteBuffer in any way,
* not even its position or other state.
*/
private static final class DatPackageReader {
/**
* .dat package data format ID "CmnD".
*/
private static final int DATA_FORMAT = 0x436d6e44;
private static final class IsAcceptable implements Authenticate {
// @Override when we switch to Java 6
public boolean isDataVersionAcceptable(byte version[]) {
return version[0] == 1;
}
}
private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
/**
* Checks that the ByteBuffer contains a valid, usable ICU .dat package.
* Moves the buffer position from 0 to after the data header.
*/
private static boolean validate(ByteBuffer bytes) {
try {
readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
} catch (IOException ignored) {
return false;
}
int count = bytes.getInt(bytes.position()); // Do not move the position.
if (count <= 0) {
return false;
}
// For each item, there is one ToC entry (8 bytes) and a name string
// and a data item of at least 16 bytes.
// (We assume no data item duplicate elimination for now.)
if (bytes.position() + 4 + count * (8 + 16) > bytes.capacity()) {
return false;
}
if (!startsWithPackageName(bytes, getNameOffset(bytes, 0)) ||
!startsWithPackageName(bytes, getNameOffset(bytes, count - 1))) {
return false;
}
return true;
}
private static boolean startsWithPackageName(ByteBuffer bytes, int start) {
// Compare all but the trailing 'b' or 'l' which depends on the platform.
int length = ICUData.PACKAGE_NAME.length() - 1;
for (int i = 0; i < length; ++i) {
if (bytes.get(start + i) != ICUData.PACKAGE_NAME.charAt(i)) {
return false;
}
}
// Check for 'b' or 'l' followed by '/'.
byte c = bytes.get(start + length++);
if ((c != 'b' && c != 'l') || bytes.get(start + length) != '/') {
return false;
}
return true;
}
private static ByteBuffer getData(ByteBuffer bytes, CharSequence key) {
int base = bytes.position();
int count = bytes.getInt(base);
// Do a binary search for the key.
int start = 0;
int limit = count;
while (start < limit) {
int mid = (start + limit) >>> 1;
int nameOffset = getNameOffset(bytes, mid);
// Skip "icudt54b/".
nameOffset += ICUData.PACKAGE_NAME.length() + 1;
int result = compareKeys(key, bytes, nameOffset);
if (result < 0) {
limit = mid;
} else if (result > 0) {
start = mid + 1;
} else {
// We found it!
ByteBuffer data = bytes.duplicate();
data.position(getDataOffset(bytes, mid));
data.limit(getDataOffset(bytes, mid + 1));
return ICUBinary.sliceWithOrder(data);
}
}
return null; // Not found or table is empty.
}
private static int getNameOffset(ByteBuffer bytes, int index) {
int base = bytes.position();
assert 0 <= index && index < bytes.getInt(base); // count
// The count integer (4 bytes)
// is followed by count (nameOffset, dataOffset) integer pairs (8 bytes per pair).
return base + bytes.getInt(base + 4 + index * 8);
}
private static int getDataOffset(ByteBuffer bytes, int index) {
int base = bytes.position();
int count = bytes.getInt(base);
if (index == count) {
// Return the limit of the last data item.
return bytes.capacity();
}
assert 0 <= index && index < count;
// The count integer (4 bytes)
// is followed by count (nameOffset, dataOffset) integer pairs (8 bytes per pair).
// The dataOffset follows the nameOffset (skip another 4 bytes).
return base + bytes.getInt(base + 4 + 4 + index * 8);
}
}
private static final class DataFile {
public final String itemPath;
/**
* null if a .dat package.
*/
public final File path;
/**
* .dat package bytes, or null if not a .dat package.
* position() is after the header.
* Do not modify the position or other state, for thread safety.
*/
public final ByteBuffer pkgBytes;
public DataFile(String item, File path) {
itemPath = item;
this.path = path;
pkgBytes = null;
}
public DataFile(String item, ByteBuffer bytes) {
itemPath = item;
path = null;
pkgBytes = bytes;
}
public String toString() {
return path.toString();
}
}
private static final List<DataFile> icuDataFiles = new ArrayList<DataFile>();
static {
// Normally com.ibm.icu.impl.ICUBinary.dataPath.
String dataPath = ICUConfig.get(ICUBinary.class.getName() + ".dataPath");
if (dataPath != null) {
addDataFilesFromPath(dataPath, icuDataFiles);
}
}
private static void addDataFilesFromPath(String dataPath, List<DataFile> files) {
// Split the path and find files in each location.
// This splitting code avoids the regex pattern compilation in String.split()
// and its array allocation.
// (There is no simple by-character split()
// and the StringTokenizer "is discouraged in new code".)
int pathStart = 0;
while (pathStart < dataPath.length()) {
int sepIndex = dataPath.indexOf(File.pathSeparatorChar, pathStart);
int pathLimit;
if (sepIndex >= 0) {
pathLimit = sepIndex;
} else {
pathLimit = dataPath.length();
}
String path = dataPath.substring(pathStart, pathLimit).trim();
if (path.endsWith(File.separator)) {
path = path.substring(0, path.length() - 1);
}
if (path.length() != 0) {
addDataFilesFromFolder(new File(path), new StringBuilder(), icuDataFiles);
}
if (sepIndex < 0) {
break;
}
pathStart = sepIndex + 1;
}
}
private static void addDataFilesFromFolder(File folder, StringBuilder itemPath,
List<DataFile> dataFiles) {
File[] files = folder.listFiles();
if (files == null || files.length == 0) {
return;
}
int folderPathLength = itemPath.length();
if (folderPathLength > 0) {
// The item path must use the ICU file separator character,
// not the platform-dependent File.separatorChar,
// so that the enumerated item paths match the paths requested by ICU code.
itemPath.append('/');
++folderPathLength;
}
for (File file : files) {
String fileName = file.getName();
if (fileName.endsWith(".txt")) {
continue;
}
itemPath.append(fileName);
if (file.isDirectory()) {
// TODO: Within a folder, put all single files before all .dat packages?
addDataFilesFromFolder(file, itemPath, dataFiles);
} else if (fileName.endsWith(".dat")) {
ByteBuffer pkgBytes = mapFile(file);
if (pkgBytes != null && DatPackageReader.validate(pkgBytes)) {
dataFiles.add(new DataFile(itemPath.toString(), pkgBytes));
}
} else {
dataFiles.add(new DataFile(itemPath.toString(), file));
}
itemPath.setLength(folderPathLength);
}
}
/**
* Compares the length-specified input key with the
* NUL-terminated table key. (ASCII)
*/
static int compareKeys(CharSequence key, ByteBuffer bytes, int offset) {
for (int i = 0;; ++i, ++offset) {
int c2 = bytes.get(offset);
if (c2 == 0) {
if (i == key.length()) {
return 0;
} else {
return 1; // key > table key because key is longer.
}
} else if (i == key.length()) {
return -1; // key < table key because key is shorter.
}
int diff = (int)key.charAt(i) - c2;
if (diff != 0) {
return diff;
}
}
}
// public inner interface ------------------------------------------------
/**
* Special interface for data authentication
*/
@ -34,10 +279,131 @@ public final class ICUBinary
// public methods --------------------------------------------------------
/**
* Loads an ICU binary data file and returns it as a ByteBuffer.
* The buffer contents is normally read-only, but its position etc. can be modified.
*
* @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu".
* @return The data as a read-only ByteBuffer,
* or null if the resource could not be found.
*/
public static ByteBuffer getData(String itemPath) {
return getData(null, null, itemPath, false);
}
/**
* Loads an ICU binary data file and returns it as a ByteBuffer.
* The buffer contents is normally read-only, but its position etc. can be modified.
*
* @param loader Used for loader.getResourceAsStream() unless the data is found elsewhere.
* @param resourceName Resource name for use with the loader.
* @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu".
* @return The data as a read-only ByteBuffer,
* or null if the resource could not be found.
*/
public static ByteBuffer getData(ClassLoader loader, String resourceName, String itemPath) {
return getData(loader, resourceName, itemPath, false);
}
/**
* Loads an ICU binary data file and returns it as a ByteBuffer.
* The buffer contents is normally read-only, but its position etc. can be modified.
*
* @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu".
* @return The data as a read-only ByteBuffer.
* @throws MissingResourceException if required==true and the resource could not be found
*/
public static ByteBuffer getRequiredData(String itemPath) {
return getData(null, null, itemPath, true);
}
/**
* Loads an ICU binary data file and returns it as a ByteBuffer.
* The buffer contents is normally read-only, but its position etc. can be modified.
*
* @param loader Used for loader.getResourceAsStream() unless the data is found elsewhere.
* @param resourceName Resource name for use with the loader.
* @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu".
* @return The data as a read-only ByteBuffer.
* @throws MissingResourceException if required==true and the resource could not be found
*/
// public static ByteBuffer getRequiredData(ClassLoader loader, String resourceName,
// String itemPath) {
// return getData(loader, resourceName, itemPath, true);
// }
/**
* Loads an ICU binary data file and returns it as a ByteBuffer.
* The buffer contents is normally read-only, but its position etc. can be modified.
*
* @param loader Used for loader.getResourceAsStream() unless the data is found elsewhere.
* @param resourceName Resource name for use with the loader.
* @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu".
* @param required If the resource cannot be found,
* this method returns null (!required) or throws an exception (required).
* @return The data as a read-only ByteBuffer,
* or null if required==false and the resource could not be found.
* @throws MissingResourceException if required==true and the resource could not be found
*/
private static ByteBuffer getData(ClassLoader loader, String resourceName,
String itemPath, boolean required) {
ByteBuffer bytes = getDataFromFile(itemPath);
if (bytes != null) {
return bytes;
}
if (loader == null) {
loader = ICUData.class.getClassLoader();
}
if (resourceName == null) {
resourceName = ICUData.ICU_BASE_NAME + '/' + itemPath;
}
InputStream is = ICUData.getStream(loader, resourceName, required);
if (is == null) {
return null;
}
try {
return getByteBufferFromInputStream(is);
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
}
}
private static ByteBuffer getDataFromFile(String itemPath) {
for (DataFile dataFile : icuDataFiles) {
if (dataFile.pkgBytes != null) {
ByteBuffer data = DatPackageReader.getData(dataFile.pkgBytes, itemPath);
if (data != null) {
return data;
}
} else if (itemPath.equals(dataFile.itemPath)) {
return mapFile(dataFile.path);
}
}
return null;
}
private static ByteBuffer mapFile(File path) {
FileInputStream file;
try {
file = new FileInputStream(path);
FileChannel channel = file.getChannel();
ByteBuffer bytes = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());
// Close the file and its channel; this seems to keep the ByteBuffer valid.
// If not, then we will need to return the pair of (file, bytes).
file.close();
return bytes;
} catch(FileNotFoundException ignored) {
System.err.println(ignored);
} catch (IOException ignored) {
System.err.println(ignored);
}
return null;
}
/**
* Same as readHeader(), but returns a VersionInfo rather than a compact int.
*/
public static final VersionInfo readHeaderAndDataVersion(ByteBuffer bytes,
public static VersionInfo readHeaderAndDataVersion(ByteBuffer bytes,
int dataFormat,
Authenticate authenticate)
throws IOException {
@ -56,7 +422,7 @@ public final class ICUBinary
* @return dataVersion
* @throws IOException if this is not a valid ICU data item of the expected dataFormat
*/
public static final int readHeader(ByteBuffer bytes, int dataFormat, Authenticate authenticate)
public static int readHeader(ByteBuffer bytes, int dataFormat, Authenticate authenticate)
throws IOException {
assert bytes.position() == 0;
byte magic1 = bytes.get(2);
@ -89,7 +455,11 @@ public final class ICUBinary
bytes.get(14) != (byte)(dataFormat >> 8) ||
bytes.get(15) != (byte)dataFormat ||
(authenticate != null && !authenticate.isDataVersionAcceptable(formatVersion))) {
throw new IOException(HEADER_AUTHENTICATION_FAILED_);
throw new IOException(HEADER_AUTHENTICATION_FAILED_ +
String.format("; data format %02x%02x%02x%02x, format version %d.%d.%d.%d",
bytes.get(12), bytes.get(13), bytes.get(14), bytes.get(15),
formatVersion[0] & 0xff, formatVersion[1] & 0xff,
formatVersion[2] & 0xff, formatVersion[3] & 0xff));
}
bytes.position(headerSize);
@ -100,17 +470,54 @@ public final class ICUBinary
(bytes.get(23) & 0xff);
}
public static final void skipBytes(ByteBuffer bytes, int skipLength) {
/**
* Writes an ICU data header.
* Does not write a copyright string.
*
* @return The length of the header (number of bytes written).
* @throws IOException from the DataOutputStream
*/
public static int writeHeader(int dataFormat, int formatVersion, int dataVersion,
DataOutputStream dos) throws IOException {
// ucmndata.h MappedData
dos.writeChar(32); // headerSize
dos.writeByte(MAGIC1);
dos.writeByte(MAGIC2);
// unicode/udata.h UDataInfo
dos.writeChar(20); // sizeof(UDataInfo)
dos.writeChar(0); // reservedWord
dos.writeByte(1); // isBigEndian
dos.writeByte(CHAR_SET_); // charsetFamily
dos.writeByte(CHAR_SIZE_); // sizeofUChar
dos.writeByte(0); // reservedByte
dos.writeInt(dataFormat);
dos.writeInt(formatVersion);
dos.writeInt(dataVersion);
// 8 bytes padding for 32 bytes headerSize (multiple of 16).
dos.writeLong(0);
assert dos.size() == 32;
return 32;
}
public static void skipBytes(ByteBuffer bytes, int skipLength) {
if (skipLength > 0) {
bytes.position(bytes.position() + skipLength);
}
}
/**
* Same as ByteBuffer.slice() plus preserving the byte order.
*/
public static ByteBuffer sliceWithOrder(ByteBuffer bytes) {
ByteBuffer b = bytes.slice();
return b.order(bytes.order());
}
/**
* Reads the entire contents from the stream into a byte array
* and wraps it into a ByteBuffer. Closes the InputStream at the end.
*/
public static final ByteBuffer getByteBufferFromInputStream(InputStream is) throws IOException {
public static ByteBuffer getByteBufferFromInputStream(InputStream is) throws IOException {
try {
int avail = is.available();
byte[] bytes = new byte[avail];
@ -128,7 +535,7 @@ public final class ICUBinary
}
}
private static final void readFully(InputStream is, byte[] bytes, int offset, int avail)
private static void readFully(InputStream is, byte[] bytes, int offset, int avail)
throws IOException {
while (avail > 0) {
int numRead = is.read(bytes, offset, avail);

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
* Copyright (C) 2004-2009, International Business Machines Corporation and *
* others. All Rights Reserved. *
* Copyright (C) 2004-2014, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* Created on Feb 4, 2004
@ -9,22 +9,83 @@
*/
package com.ibm.icu.impl;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.MissingResourceException;
import java.util.logging.Logger;
import com.ibm.icu.util.VersionInfo;
/**
* Provides access to ICU data files as InputStreams. Implements security checking.
*/
public final class ICUData {
/*
* Return a URL to the ICU resource names resourceName. The
* resource name should either be an absolute path, or a path relative to
* com.ibm.icu.impl (e.g., most likely it is 'data/foo'). If required
* is true, throw an MissingResourceException instead of returning a null result.
/**
* The data path to be used with getBundleInstance API
*/
static final String ICU_DATA_PATH = "com/ibm/icu/impl/";
/**
* The ICU data package name.
* This is normally the name of the .dat package, and the prefix (plus '/')
* of the package entry names.
*/
static final String PACKAGE_NAME = "icudt" + VersionInfo.ICU_DATA_VERSION_PATH;
/**
* The data path to be used with Class.getResourceAsStream().
*/
public static final String ICU_BUNDLE = "data/" + PACKAGE_NAME;
/**
* The base name of ICU data to be used with ClassLoader.getResourceAsStream(),
* ICUResourceBundle.getBundleInstance() etc.
*/
public static final String ICU_BASE_NAME = ICU_DATA_PATH + ICU_BUNDLE;
/**
* The base name of collation data to be used with getBundleInstance API
*/
public static final String ICU_COLLATION_BASE_NAME = ICU_BASE_NAME + "/coll";
/**
* The base name of rbbi data to be used with getData API
*/
public static final String ICU_BRKITR_NAME = "brkitr";
/**
* The base name of rbbi data to be used with getBundleInstance API
*/
public static final String ICU_BRKITR_BASE_NAME = ICU_BASE_NAME + '/' + ICU_BRKITR_NAME;
/**
* The base name of rbnf data to be used with getBundleInstance API
*/
public static final String ICU_RBNF_BASE_NAME = ICU_BASE_NAME + "/rbnf";
/**
* The base name of transliterator data to be used with getBundleInstance API
*/
public static final String ICU_TRANSLIT_BASE_NAME = ICU_BASE_NAME + "/translit";
public static final String ICU_LANG_BASE_NAME = ICU_BASE_NAME + "/lang";
public static final String ICU_CURR_BASE_NAME = ICU_BASE_NAME + "/curr";
public static final String ICU_REGION_BASE_NAME = ICU_BASE_NAME + "/region";
public static final String ICU_ZONE_BASE_NAME = ICU_BASE_NAME + "/zone";
/**
* For testing (otherwise false): When reading an InputStream from a Class or ClassLoader
* (that is, not from a file), log when the stream contains ICU binary data.
*
* This cannot be ICUConfig'ured because ICUConfig calls ICUData.getStream()
* to read the properties file, so we would get a circular dependency
* in the class initialization.
*/
private static final boolean logBinaryDataFromInputStream = false;
private static final Logger logger = logBinaryDataFromInputStream ?
Logger.getLogger(ICUData.class.getName()) : null;
public static boolean exists(final String resourceName) {
URL i = null;
if (System.getSecurityManager() != null) {
@ -38,10 +99,9 @@ public final class ICUData {
}
return i != null;
}
private static InputStream getStream(final Class<?> root, final String resourceName, boolean required) {
InputStream i = null;
if (System.getSecurityManager() != null) {
i = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
public InputStream run() {
@ -55,10 +115,14 @@ public final class ICUData {
if (i == null && required) {
throw new MissingResourceException("could not locate data " +resourceName, root.getPackage().getName(), resourceName);
}
checkStreamForBinaryData(i, resourceName);
return i;
}
private static InputStream getStream(final ClassLoader loader, final String resourceName, boolean required) {
/**
* Should be called only from ICUBinary.getData() or from convenience overloads here.
*/
static InputStream getStream(final ClassLoader loader, final String resourceName, boolean required) {
InputStream i = null;
if (System.getSecurityManager() != null) {
i = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
@ -72,40 +136,67 @@ public final class ICUData {
if (i == null && required) {
throw new MissingResourceException("could not locate data", loader.toString(), resourceName);
}
checkStreamForBinaryData(i, resourceName);
return i;
}
@SuppressWarnings("unused") // used if logBinaryDataFromInputStream == true
private static void checkStreamForBinaryData(InputStream is, String resourceName) {
if (logBinaryDataFromInputStream && is != null && resourceName.indexOf(PACKAGE_NAME) >= 0) {
try {
is.mark(32);
byte[] b = new byte[32];
int len = is.read(b);
if (len == 32 && b[2] == (byte)0xda && b[3] == 0x27) {
String msg = String.format(
"ICU binary data file loaded from Class/ClassLoader as InputStream " +
"from %s: MappedData %02x%02x%02x%02x dataFormat %02x%02x%02x%02x",
resourceName,
b[0], b[1], b[2], b[3],
b[12], b[13], b[14], b[15]);
logger.info(msg);
}
is.reset();
} catch (IOException ignored) {
}
}
}
public static InputStream getStream(ClassLoader loader, String resourceName){
return getStream(loader,resourceName, false);
return getStream(loader,resourceName, false);
}
public static InputStream getRequiredStream(ClassLoader loader, String resourceName){
return getStream(loader, resourceName, true);
}
/*
/**
* Convenience override that calls getStream(ICUData.class, resourceName, false);
* Returns null if the resource could not be found.
*/
public static InputStream getStream(String resourceName) {
return getStream(ICUData.class, resourceName, false);
}
/*
/**
* Convenience method that calls getStream(ICUData.class, resourceName, true).
* @throws MissingResourceException if the resource could not be found
*/
public static InputStream getRequiredStream(String resourceName) {
return getStream(ICUData.class, resourceName, true);
}
/*
/**
* Convenience override that calls getStream(root, resourceName, false);
* Returns null if the resource could not be found.
*/
public static InputStream getStream(Class<?> root, String resourceName) {
return getStream(root, resourceName, false);
}
/*
/**
* Convenience method that calls getStream(root, resourceName, true).
* @throws MissingResourceException if the resource could not be found
*/
public static InputStream getRequiredStream(Class<?> root, String resourceName) {
return getStream(root, resourceName, true);

View file

@ -29,52 +29,76 @@ import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.UResourceBundle;
import com.ibm.icu.util.UResourceBundleIterator;
import com.ibm.icu.util.UResourceTypeMismatchException;
import com.ibm.icu.util.VersionInfo;
public class ICUResourceBundle extends UResourceBundle {
/**
* The data path to be used with getBundleInstance API
* @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
protected static final String ICU_DATA_PATH = "com/ibm/icu/impl/";
@Deprecated
protected static final String ICU_DATA_PATH = ICUData.ICU_DATA_PATH;
/**
* The data path to be used with getBundleInstance API
* @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
public static final String ICU_BUNDLE = "data/icudt" + VersionInfo.ICU_DATA_VERSION_PATH;
@Deprecated
public static final String ICU_BUNDLE = ICUData.ICU_BUNDLE;
/**
* The base name of ICU data to be used with getBundleInstance API
* @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
public static final String ICU_BASE_NAME = ICU_DATA_PATH + ICU_BUNDLE;
@Deprecated
public static final String ICU_BASE_NAME = ICUData.ICU_BASE_NAME;
/**
* The base name of collation data to be used with getBundleInstance API
* @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
public static final String ICU_COLLATION_BASE_NAME = ICU_BASE_NAME + "/coll";
/**
* The base name of rbbi data to be used with getData API
*/
public static final String ICU_BRKITR_NAME = "/brkitr";
@Deprecated
public static final String ICU_COLLATION_BASE_NAME = ICUData.ICU_COLLATION_BASE_NAME;
/**
* The base name of rbbi data to be used with getBundleInstance API
* @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
public static final String ICU_BRKITR_BASE_NAME = ICU_BASE_NAME + ICU_BRKITR_NAME;
@Deprecated
public static final String ICU_BRKITR_BASE_NAME = ICUData.ICU_BRKITR_BASE_NAME;
/**
* The base name of rbnf data to be used with getBundleInstance API
* @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
public static final String ICU_RBNF_BASE_NAME = ICU_BASE_NAME + "/rbnf";
@Deprecated
public static final String ICU_RBNF_BASE_NAME = ICUData.ICU_RBNF_BASE_NAME;
/**
* The base name of transliterator data to be used with getBundleInstance API
* @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
public static final String ICU_TRANSLIT_BASE_NAME = ICU_BASE_NAME + "/translit";
@Deprecated
public static final String ICU_TRANSLIT_BASE_NAME = ICUData.ICU_TRANSLIT_BASE_NAME;
public static final String ICU_LANG_BASE_NAME = ICU_BASE_NAME + "/lang";
public static final String ICU_CURR_BASE_NAME = ICU_BASE_NAME + "/curr";
public static final String ICU_REGION_BASE_NAME = ICU_BASE_NAME + "/region";
public static final String ICU_ZONE_BASE_NAME = ICU_BASE_NAME + "/zone";
/**
* @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
@Deprecated
public static final String ICU_LANG_BASE_NAME = ICUData.ICU_LANG_BASE_NAME;
/**
* @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
@Deprecated
public static final String ICU_CURR_BASE_NAME = ICUData.ICU_CURR_BASE_NAME;
/**
* @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
@Deprecated
public static final String ICU_REGION_BASE_NAME = ICUData.ICU_REGION_BASE_NAME;
/**
* @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
@Deprecated
public static final String ICU_ZONE_BASE_NAME = ICUData.ICU_ZONE_BASE_NAME;
private static final String NO_INHERITANCE_MARKER = "\u2205\u2205\u2205";

View file

@ -328,11 +328,25 @@ public final class ICUResourceBundleReader {
@Override
protected ICUResourceBundleReader createInstance(ReaderInfo key, ReaderInfo data) {
String fullName = ICUResourceBundleReader.getFullName(data.baseName, data.localeID);
InputStream stream = ICUData.getStream(data.loader, fullName);
if (stream == null) {
return NULL_READER;
try {
ByteBuffer inBytes;
if (data.baseName != null && data.baseName.startsWith(ICUData.ICU_BASE_NAME)) {
String itemPath = fullName.substring(ICUData.ICU_BASE_NAME.length() + 1);
inBytes = ICUBinary.getData(data.loader, fullName, itemPath);
if (inBytes == null) {
return NULL_READER;
}
} else {
InputStream stream = ICUData.getStream(data.loader, fullName);
if (stream == null) {
return NULL_READER;
}
inBytes = ICUBinary.getByteBufferFromInputStream(stream);
}
return new ICUResourceBundleReader(inBytes, data.baseName, data.localeID, data.loader);
} catch (IOException ex) {
throw new ICUUncheckedIOException("Data file " + fullName + " is corrupt - " + ex.getMessage(), ex);
}
return new ICUResourceBundleReader(stream, data.baseName, data.localeID, data.loader);
}
}
@ -342,14 +356,10 @@ public final class ICUResourceBundleReader {
private ICUResourceBundleReader() {
}
private ICUResourceBundleReader(InputStream stream, String baseName, String localeID, ClassLoader loader) {
try {
ByteBuffer inBytes = ICUBinary.getByteBufferFromInputStream(stream);
init(inBytes);
} catch (IOException ex) {
String fullName = ICUResourceBundleReader.getFullName(baseName, localeID);
throw new ICUUncheckedIOException("Data file " + fullName + " is corrupt - " + ex.getMessage(), ex);
}
private ICUResourceBundleReader(ByteBuffer inBytes,
String baseName, String localeID,
ClassLoader loader) throws IOException {
init(inBytes);
// set pool bundle keys if necessary
if (usesPoolBundle) {
@ -377,7 +387,7 @@ public final class ICUResourceBundleReader {
private void init(ByteBuffer inBytes) throws IOException {
dataVersion = ICUBinary.readHeader(inBytes, DATA_FORMAT, IS_ACCEPTABLE);
boolean isFormatVersion10 = inBytes.get(16) == 1 && inBytes.get(17) == 0;
bytes = inBytes.slice();
bytes = ICUBinary.sliceWithOrder(inBytes);
int dataLength = bytes.remaining();
if(DEBUG) System.out.println("The ByteBuffer is direct (memory-mapped): " + bytes.isDirect());
@ -420,7 +430,7 @@ public final class ICUResourceBundleReader {
if(_16BitTop > keysTop) {
int num16BitUnits = (_16BitTop - keysTop) * 2;
bytes.position(keysTop << 2);
b16BitUnits = bytes.slice().asCharBuffer();
b16BitUnits = bytes.asCharBuffer();
b16BitUnits.limit(num16BitUnits);
maxOffset |= num16BitUnits - 1;
} else {
@ -444,7 +454,7 @@ public final class ICUResourceBundleReader {
// unlike regular bundles' key strings for which indexes
// are based on the start of the bundle data.
bytes.position((1 + indexLength) << 2);
bytes = bytes.slice();
bytes = ICUBinary.sliceWithOrder(bytes);
} else {
localKeyLimit = getIndexesInt(URES_INDEX_KEYS_TOP) << 2;
}
@ -582,38 +592,18 @@ public final class ICUResourceBundleReader {
return makeKeyStringFromBytes(poolBundleKeys, keyOffset & 0x7fffffff);
}
}
// Compare the length-specified input key with the
// NUL-terminated table key.
private static int compareKeys(CharSequence key, ByteBuffer keyBytes, int keyOffset) {
for(int i = 0;; ++i, ++keyOffset) {
int c2 = keyBytes.get(keyOffset);
if(c2 == 0) {
if(i == key.length()) {
return 0;
} else {
return 1; // key > table key because key is longer.
}
} else if(i == key.length()) {
return -1; // key < table key because key is shorter.
}
int diff = (int)key.charAt(i) - c2;
if(diff != 0) {
return diff;
}
}
}
private int compareKeys(CharSequence key, char keyOffset) {
if(keyOffset < localKeyLimit) {
return compareKeys(key, bytes, keyOffset);
return ICUBinary.compareKeys(key, bytes, keyOffset);
} else {
return compareKeys(key, poolBundleKeys, keyOffset - localKeyLimit);
return ICUBinary.compareKeys(key, poolBundleKeys, keyOffset - localKeyLimit);
}
}
private int compareKeys32(CharSequence key, int keyOffset) {
if(keyOffset >= 0) {
return compareKeys(key, bytes, keyOffset);
return ICUBinary.compareKeys(key, bytes, keyOffset);
} else {
return compareKeys(key, poolBundleKeys, keyOffset & 0x7fffffff);
return ICUBinary.compareKeys(key, poolBundleKeys, keyOffset & 0x7fffffff);
}
}
@ -743,7 +733,7 @@ public final class ICUResourceBundleReader {
offset += 4;
ByteBuffer result = bytes.duplicate();
result.position(offset).limit(offset + length);
result = result.slice();
result = ICUBinary.sliceWithOrder(result);
if(!result.isReadOnly()) {
result = result.asReadOnlyBuffer();
}

View file

@ -345,7 +345,7 @@ public final class Norm2AllModes {
protected Norm2AllModes createInstance(String key, ByteBuffer bytes) {
Normalizer2Impl impl;
if(bytes==null) {
impl=new Normalizer2Impl().load(ICUResourceBundle.ICU_BUNDLE+"/"+key+".nrm");
impl=new Normalizer2Impl().load(key+".nrm");
} else {
impl=new Normalizer2Impl().load(bytes);
}
@ -365,8 +365,7 @@ public final class Norm2AllModes {
private static final class Norm2AllModesSingleton {
private Norm2AllModesSingleton(String name) {
try {
Normalizer2Impl impl=new Normalizer2Impl().load(
ICUResourceBundle.ICU_BUNDLE+"/"+name+".nrm");
Normalizer2Impl impl=new Normalizer2Impl().load(name+".nrm");
allModes=new Norm2AllModes(impl);
} catch(RuntimeException e) {
exception=e;

View file

@ -494,11 +494,7 @@ public final class Normalizer2Impl {
}
}
public Normalizer2Impl load(String name) {
try {
return load(ICUBinary.getByteBufferFromInputStream(ICUData.getRequiredStream(name)));
} catch(IOException e) {
throw new ICUUncheckedIOException(e);
}
return load(ICUBinary.getRequiredData(name));
}
private void enumLcccRange(int start, int end, int norm16, UnicodeSet set) {

View file

@ -98,10 +98,12 @@ public abstract class Trie2 implements Iterable<Trie2.Range> {
header.signature = bytes.getInt();
switch (header.signature) {
case 0x54726932:
bytes.order(ByteOrder.BIG_ENDIAN);
// The buffer is already set to the trie data byte order.
break;
case 0x32697254:
bytes.order(ByteOrder.LITTLE_ENDIAN);
// Temporarily reverse the byte order.
boolean isBigEndian = outerByteOrder == ByteOrder.BIG_ENDIAN;
bytes.order(isBigEndian ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
header.signature = 0x54726932;
break;
default:

View file

@ -20,7 +20,6 @@
package com.ibm.icu.impl;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Iterator;
@ -34,8 +33,7 @@ public final class UBiDiProps {
// port of ubidi_openProps()
private UBiDiProps() throws IOException{
InputStream is=ICUData.getStream(ICUResourceBundle.ICU_BUNDLE+"/"+DATA_FILE_NAME);
ByteBuffer bytes=ICUBinary.getByteBufferFromInputStream(is);
ByteBuffer bytes=ICUBinary.getData(DATA_FILE_NAME);
readData(bytes);
}

View file

@ -20,7 +20,6 @@
package com.ibm.icu.impl;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Iterator;
@ -37,8 +36,7 @@ public final class UCaseProps {
// port of ucase_openProps()
private UCaseProps() throws IOException {
InputStream is=ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/"+DATA_FILE_NAME);
ByteBuffer bytes=ICUBinary.getByteBufferFromInputStream(is);
ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME);
readData(bytes);
}

View file

@ -8,7 +8,6 @@
package com.ibm.icu.impl;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Locale;
import java.util.MissingResourceException;
@ -1039,7 +1038,7 @@ public final class UCharacterName
/**
* Default name of the name datafile
*/
private static final String NAME_FILE_NAME_ = ICUResourceBundle.ICU_BUNDLE+"/unames.icu";
private static final String FILE_NAME_ = "unames.icu";
/**
* Shift count to retrieve group information
*/
@ -1168,8 +1167,7 @@ public final class UCharacterName
*/
private UCharacterName() throws IOException
{
InputStream is = ICUData.getRequiredStream(NAME_FILE_NAME_);
ByteBuffer b = ICUBinary.getByteBufferFromInputStream(is);
ByteBuffer b = ICUBinary.getRequiredData(FILE_NAME_);
UCharacterNameReader reader = new UCharacterNameReader(b);
reader.read(this);
}

View file

@ -8,7 +8,6 @@
package com.ibm.icu.impl;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Iterator;
import java.util.MissingResourceException;
@ -970,7 +969,7 @@ public final class UCharacterProperty
/**
* Default name of the datafile
*/
private static final String DATA_FILE_NAME_ = ICUResourceBundle.ICU_BUNDLE+"/uprops.icu";
private static final String DATA_FILE_NAME_ = "uprops.icu";
/**
* Shift value for lead surrogate to form a supplementary character.
@ -1184,8 +1183,7 @@ public final class UCharacterProperty
}
// jar access
InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME_);
ByteBuffer bytes=ICUBinary.getByteBufferFromInputStream(is);
ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME_);
m_unicodeVersion_ = ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, new IsAcceptable());
// Read or skip the 16 indexes.
int propertyOffset = bytes.getInt();

View file

@ -13,7 +13,6 @@
package com.ibm.icu.impl;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.MissingResourceException;
@ -116,8 +115,7 @@ public final class UPropertyAliases {
}
private UPropertyAliases() throws IOException {
InputStream stream = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/pnames.icu");
ByteBuffer bytes = ICUBinary.getByteBufferFromInputStream(stream);
ByteBuffer bytes = ICUBinary.getRequiredData("pnames.icu");
load(bytes);
}

View file

@ -1,7 +1,7 @@
/*
******************************************************************************
* Copyright (C) 2007-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
* Copyright (C) 2007-2014, International Business Machines Corporation and
* others. All Rights Reserved.
******************************************************************************
*/
@ -103,26 +103,21 @@ public class ResourceBasedPeriodFormatterDataService extends
if (ln != null) {
String name = PATH + "pfd_" + ln + ".xml";
try {
InputStream is = ICUData.getStream(getClass(), name);
if (is == null) {
throw new MissingResourceException(
"no resource named " + name, name, "");
} else {
DataRecord dr = DataRecord.read(ln,
new XMLRecordReader(new InputStreamReader(
is, "UTF-8")));
if (dr != null) {
// debug
// if (false && ln.equals("ar_EG")) {
// OutputStreamWriter osw = new
// OutputStreamWriter(System.out, "UTF-8");
// XMLRecordWriter xrw = new
// XMLRecordWriter(osw);
// dr.write(xrw);
// osw.flush();
// }
ld = new PeriodFormatterData(localeName, dr);
}
InputStream is = ICUData.getRequiredStream(getClass(), name);
DataRecord dr = DataRecord.read(ln,
new XMLRecordReader(new InputStreamReader(
is, "UTF-8")));
if (dr != null) {
// debug
// if (false && ln.equals("ar_EG")) {
// OutputStreamWriter osw = new
// OutputStreamWriter(System.out, "UTF-8");
// XMLRecordWriter xrw = new
// XMLRecordWriter(osw);
// dr.write(xrw);
// osw.flush();
// }
ld = new PeriodFormatterData(localeName, dr);
}
} catch (UnsupportedEncodingException e) {
throw new MissingResourceException(

View file

@ -8,7 +8,6 @@
package com.ibm.icu.text;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Locale;
import java.util.MissingResourceException;
@ -111,9 +110,8 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
try {
String typeKey = KIND_NAMES[kind];
String brkfname = rb.getStringWithFallback("boundaries/" + typeKey);
String rulesFileName = ICUResourceBundle.ICU_BUNDLE +ICUResourceBundle.ICU_BRKITR_NAME+ "/" + brkfname;
InputStream ruleStream = ICUData.getStream(rulesFileName);
bytes = ICUBinary.getByteBufferFromInputStream(ruleStream);
String rulesFileName = ICUData.ICU_BRKITR_NAME+ '/' + brkfname;
bytes = ICUBinary.getData(rulesFileName);
}
catch (Exception e) {
throw new MissingResourceException(e.toString(),"","");

View file

@ -8,7 +8,6 @@
package com.ibm.icu.text;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import com.ibm.icu.impl.Assert;
@ -45,9 +44,8 @@ final class DictionaryData {
public static DictionaryMatcher loadDictionaryFor(String dictType) throws IOException {
ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BRKITR_BASE_NAME);
String dictFileName = rb.getStringWithFallback("dictionaries/" + dictType);
dictFileName = ICUResourceBundle.ICU_BUNDLE +ICUResourceBundle.ICU_BRKITR_NAME+ "/" + dictFileName;
InputStream is = ICUData.getStream(dictFileName);
ByteBuffer bytes = ICUBinary.getByteBufferFromInputStream(is);
dictFileName = ICUData.ICU_BRKITR_NAME + '/' + dictFileName;
ByteBuffer bytes = ICUBinary.getRequiredData(dictFileName);
ICUBinary.readHeader(bytes, DATA_FORMAT_ID, null);
int[] indexes = new int[IX_COUNT];
// TODO: read indexes[IX_STRING_TRIE_OFFSET] first, then read a variable-length indexes[]

View file

@ -9,10 +9,12 @@ package com.ibm.icu.text;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import com.ibm.icu.impl.CharTrie;
import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.Trie;
import com.ibm.icu.impl.ICUBinary.Authenticate;
/**
* <p>Internal class used for Rule Based Break Iterators</p>
@ -32,7 +34,20 @@ final class RBBIDataWrapper {
CharTrie fTrie;
String fRuleSource;
int fStatusTable[];
private boolean isBigEndian;
static final int DATA_FORMAT = 0x42726b20; // "Brk "
static final int FORMAT_VERSION = 0x03010000; // 3.1
private static final class IsAcceptable implements Authenticate {
// @Override when we switch to Java 6
public boolean isDataVersionAcceptable(byte version[]) {
return version[0] == (FORMAT_VERSION >>> 24);
}
}
private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
//
// Indexes to fields in the ICU4C style binary form of the RBBI Data Header
// Used by the rule compiler when flattening the data.
@ -70,12 +85,12 @@ final class RBBIDataWrapper {
// Index offsets to header fields of a state table
// struct RBBIStateTable {... in the C version.
//
final static int NUMSTATES = 0;
final static int ROWLEN = 2;
final static int FLAGS = 4;
final static int RESERVED_2 = 6;
final static int ROW_DATA = 8;
static final int NUMSTATES = 0;
static final int ROWLEN = 2;
static final int FLAGS = 4;
//ivate static final int RESERVED_2 = 6;
private static final int ROW_DATA = 8;
// Bit selectors for the "FLAGS" field of the state table header
// enum RBBIStateTableFlags in the C version.
//
@ -153,18 +168,20 @@ final class RBBIDataWrapper {
RBBIDataWrapper This = new RBBIDataWrapper();
// Seek past the ICU data header.
// TODO: verify that the header looks good.
ICUBinary.skipBytes(bytes, 0x80);
ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
This.isBigEndian = bytes.order() == ByteOrder.BIG_ENDIAN;
// Read in the RBBI data header...
This.fHeader = new RBBIDataHeader();
This.fHeader.fMagic = bytes.getInt();
This.fHeader.fVersion = bytes.getInt();
This.fHeader.fFormatVersion[0] = (byte) (This.fHeader.fVersion >> 24);
This.fHeader.fFormatVersion[1] = (byte) (This.fHeader.fVersion >> 16);
This.fHeader.fFormatVersion[2] = (byte) (This.fHeader.fVersion >> 8);
This.fHeader.fFormatVersion[3] = (byte) (This.fHeader.fVersion);
// Read the same 4 bytes as an int and as a byte array: The data format could be
// the old fVersion=1 (TODO: probably not with a real ICU data header?)
// or the new fFormatVersion=3.x.
This.fHeader.fVersion = bytes.getInt(bytes.position());
This.fHeader.fFormatVersion[0] = bytes.get();
This.fHeader.fFormatVersion[1] = bytes.get();
This.fHeader.fFormatVersion[2] = bytes.get();
This.fHeader.fFormatVersion[3] = bytes.get();
This.fHeader.fLength = bytes.getInt();
This.fHeader.fCatCount = bytes.getInt();
This.fHeader.fFTable = bytes.getInt();
@ -322,14 +339,20 @@ final class RBBIDataWrapper {
///CLOVER:OFF
// Getters for fields from the state table header
//
final static int getNumStates(short table[]) {
int hi = table[NUMSTATES];
int lo = table[NUMSTATES+1];
int val = (hi<<16) + (lo&0x0000ffff);
return val;
private int getStateTableNumStates(short table[]) {
if (isBigEndian) {
return (table[NUMSTATES] << 16) | (table[NUMSTATES+1] & 0xffff);
} else {
return (table[NUMSTATES+1] << 16) | (table[NUMSTATES] & 0xffff);
}
}
///CLOVER:ON
int getStateTableFlags(short table[]) {
// This works for up to 15 flags bits.
return table[isBigEndian ? FLAGS + 1 : FLAGS];
}
///CLOVER:OFF
/* Debug function to display the break iterator data. */
void dump() {
@ -395,7 +418,7 @@ final class RBBIDataWrapper {
System.out.print("-");
}
System.out.println();
for (state=0; state< getNumStates(table); state++) {
for (state=0; state< getStateTableNumStates(table); state++) {
dumpRow(table, state);
}
System.out.println();

View file

@ -1,5 +1,5 @@
//
// Copyright (C) 2002-2009, International Business Machines Corporation and others.
// Copyright (C) 2002-2014, International Business Machines Corporation and others.
// All Rights Reserved.
//
//
@ -16,6 +16,7 @@ import java.util.Map;
import java.util.Set;
import com.ibm.icu.impl.Assert;
import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.ICUDebug;
class RBBIRuleBuilder {
@ -185,12 +186,8 @@ class RBBIRuleBuilder {
//
// Write out an ICU Data Header
// TODO: actually create a real header, rather than just a placeholder.
// The empty placeholder is ok for compile-and-go from within ICU4J.
// Replicating the ICU4C genbrk tool for building .brk resources would need a real header.
//
byte[] ICUDataHeader = new byte[0x80];
dos.write(ICUDataHeader);
ICUBinary.writeHeader(RBBIDataWrapper.DATA_FORMAT, RBBIDataWrapper.FORMAT_VERSION, 0, dos);
//
// Write out the RBBIDataHeader

View file

@ -1200,7 +1200,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
int state = START_STATE;
int row = fRData.getRowIndex(state);
short category = 3;
short flagsState = stateTable[RBBIDataWrapper.FLAGS+1];
int flagsState = fRData.getStateTableFlags(stateTable);
int mode = RBBI_RUN;
if ((flagsState & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
category = 2;
@ -1373,7 +1373,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
int initialPosition = 0;
int lookaheadResult = 0;
boolean lookAheadHardBreak =
(stateTable[RBBIDataWrapper.FLAGS+1] & RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK) != 0;
(fRData.getStateTableFlags(stateTable) & RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK) != 0;
// handlePrevious() never gets the rule status.
// Flag the status as invalid; if the user ever asks for status, we will need
@ -1392,7 +1392,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
row = fRData.getRowIndex(state);
category = 3; // TODO: obsolete? from the old start/run mode scheme?
mode = RBBI_RUN;
if ((stateTable[RBBIDataWrapper.FLAGS+1] & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
if ((fRData.getStateTableFlags(stateTable) & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
category = 2;
mode = RBBI_START;
}

View file

@ -11,7 +11,6 @@ package com.ibm.icu.text;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.LineNumberReader;
import java.io.Reader;
import java.nio.ByteBuffer;
@ -32,6 +31,7 @@ import java.util.regex.Pattern;
import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.Trie2;
import com.ibm.icu.impl.Trie2Writable;
import com.ibm.icu.impl.ICUBinary.Authenticate;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UCharacterCategory;
import com.ibm.icu.lang.UProperty;
@ -2172,24 +2172,32 @@ public class SpoofChecker {
}
}
private static final int DATA_FORMAT = 0x43667520; // "Cfu "
private static final class IsAcceptable implements Authenticate {
// @Override when we switch to Java 6
public boolean isDataVersionAcceptable(byte version[]) {
return version[0] == 1;
}
}
private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
// getDefault() - Create a SpoofData instance that is built from
// the data baked into the default ICU data.
private static final class DefaultData {
private static SpoofData INSTANCE = null;
static {
try {
INSTANCE = new SpoofData(ICUBinary.getRequiredData("confusables.cfu"));
} catch (IOException ignored) {
}
}
}
/**
* @return instance for Unicode standard data
*/
static SpoofData getDefault() {
// TODO: Cache it. Lazy create, keep until cleanup.
SpoofData This = null;
try {
InputStream is = com.ibm.icu.impl.ICUData.getRequiredStream(com.ibm.icu.impl.ICUResourceBundle.ICU_BUNDLE
+ "/confusables.cfu");
This = new SpoofData(ICUBinary.getByteBufferFromInputStream(is));
is.close();
}
catch (IOException e) {
// Return null in this case.
}
return This;
return DefaultData.INSTANCE;
}
// SpoofChecker Data constructor for use from data builder.
@ -2200,9 +2208,7 @@ public class SpoofChecker {
// Constructor for use when creating from prebuilt default data.
// A ByteBuffer is what the ICU internal data loading functions provide.
SpoofData(ByteBuffer bytes) throws java.io.IOException {
// Seek past the ICU data header.
// TODO: verify that the header looks good.
ICUBinary.skipBytes(bytes, 0x80);
ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
bytes.mark();
readData(bytes);
}

View file

@ -14,8 +14,6 @@ import java.nio.ByteBuffer;
import com.ibm.icu.impl.CharTrie;
import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.StringPrepDataReader;
import com.ibm.icu.impl.UBiDiProps;
import com.ibm.icu.lang.UCharacter;
@ -272,7 +270,10 @@ public final class StringPrep {
*/
public StringPrep(InputStream inputStream) throws IOException{
// TODO: Add a public constructor that takes ByteBuffer directly.
ByteBuffer bytes = ICUBinary.getByteBufferFromInputStream(inputStream);
this(ICUBinary.getByteBufferFromInputStream(inputStream));
}
private StringPrep(ByteBuffer bytes) throws IOException {
StringPrepDataReader reader = new StringPrepDataReader(bytes);
// read the indexes
@ -328,15 +329,10 @@ public final class StringPrep {
}
if (instance == null) {
InputStream stream = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/"
+ PROFILE_NAMES[profile] + ".spp");
if (stream != null) {
ByteBuffer bytes = ICUBinary.getRequiredData(PROFILE_NAMES[profile] + ".spp");
if (bytes != null) {
try {
try {
instance = new StringPrep(stream);
} finally {
stream.close();
}
instance = new StringPrep(bytes);
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
}

View file

@ -1,9 +1,7 @@
/*
*******************************************************************************
* Copyright (C) 2002-2012, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* Copyright (C) 2002-2014, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
@ -216,7 +214,6 @@ public class TestConversion extends ModuleTest {
private void FromUnicodeCase(ConversionCase cc) {
// create charset encoder for conversion test
CharsetProviderICU provider = new CharsetProviderICU();
CharsetEncoder encoder = null;
@ -227,17 +224,21 @@ public class TestConversion extends ModuleTest {
? (Charset) provider.charsetForName(cc.charset.substring(1),
"com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader())
: (Charset) provider.charsetForName(cc.charset);
encoder = (CharsetEncoder) charset.newEncoder();
encoder.onMalformedInput(CodingErrorAction.REPLACE);
encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
if (encoder instanceof CharsetEncoderICU) {
((CharsetEncoderICU)encoder).setFallbackUsed(cc.fallbacks);
if (((CharsetEncoderICU)encoder).isFallbackUsed() != cc.fallbacks) {
errln("Fallback could not be set for " + cc.charset);
if (charset != null) {
encoder = (CharsetEncoder) charset.newEncoder();
encoder.onMalformedInput(CodingErrorAction.REPLACE);
encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
if (encoder instanceof CharsetEncoderICU) {
((CharsetEncoderICU)encoder).setFallbackUsed(cc.fallbacks);
if (((CharsetEncoderICU)encoder).isFallbackUsed() != cc.fallbacks) {
errln("Fallback could not be set for " + cc.charset);
}
}
}
} catch (Exception e) {
encoder = null;
}
if (encoder == null) {
if (cc.charset.charAt(0) == UNSUPPORTED_CHARSET_SYMBOL) {
logln("Skipping test:(" + cc.charset.substring(1) + ") due to ICU Charset not supported at this time");
} else {
@ -245,7 +246,7 @@ public class TestConversion extends ModuleTest {
}
return;
}
// set the callback for the encoder
if (cc.cbErrorAction != null) {
if (cc.cbEncoder != null) {
@ -514,12 +515,16 @@ public class TestConversion extends ModuleTest {
? (Charset) provider.charsetForName(cc.charset.substring(1),
"com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader())
: (Charset) provider.charsetForName(cc.charset);
decoder = (CharsetDecoder) charset.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
if (charset != null) {
decoder = (CharsetDecoder) charset.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
} catch (Exception e) {
// TODO implement loading of test data.
decoder = null;
}
if (decoder == null) {
if (cc.charset.charAt(0) == UNSUPPORTED_CHARSET_SYMBOL) {
logln("Skipping test:(" + cc.charset.substring(1) + ") due to ICU Charset not supported at this time");
} else {
@ -899,12 +904,12 @@ public class TestConversion extends ModuleTest {
//checking for converter that are not supported at this point
try{
if(charset.name()=="BOCU-1" ||charset.name()== "SCSU"|| charset.name()=="lmbcs1" || charset.name()== "lmbcs2" ||
if(charset==null ||
charset.name()=="BOCU-1" ||charset.name()== "SCSU"|| charset.name()=="lmbcs1" || charset.name()== "lmbcs2" ||
charset.name()== "lmbcs3" || charset.name()== "lmbcs4" || charset.name()=="lmbcs5" || charset.name()=="lmbcs6" ||
charset.name()== "lmbcs8" || charset.name()=="lmbcs11" || charset.name()=="lmbcs16" || charset.name()=="lmbcs17" ||
charset.name()=="lmbcs18"|| charset.name()=="lmbcs19"){
logln("Converter not supported at this point :" +charset.displayName());
logln("Converter not supported at this point :" + cc.charset);
return;
}