mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-8105 new data structure for Unicode property names data (pnames.icu formatVersion 2); includes new dictionary-type tries (only ByteTrie runtime for now, see ticket #8167); merge branches/markus/pnames2 -r 29097:29250
X-SVN-Rev: 29253
This commit is contained in:
parent
c04082d93c
commit
beb1e5718e
4 changed files with 864 additions and 533 deletions
638
icu4j/main/classes/core/src/com/ibm/icu/impl/ByteTrie.java
Normal file
638
icu4j/main/classes/core/src/com/ibm/icu/impl/ByteTrie.java
Normal file
|
@ -0,0 +1,638 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* created on: 2010nov23
|
||||
* created by: Markus W. Scherer
|
||||
* ported from ICU4C bytetrie.h/.cpp
|
||||
*/
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Light-weight, non-const reader class for a ByteTrie.
|
||||
* Traverses a byte-serialized data structure with minimal state,
|
||||
* for mapping byte sequences to non-negative integer values.
|
||||
*
|
||||
* @author Markus W. Scherer
|
||||
*/
|
||||
public final class ByteTrie implements Cloneable {
|
||||
public ByteTrie(byte[] trieBytes, int offset) {
|
||||
bytes_=trieBytes;
|
||||
pos_=root_=offset;
|
||||
remainingMatchLength_=-1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clones this trie reader object and its state,
|
||||
* but not the byte array which will be shared.
|
||||
* @return A shallow clone of this trie.
|
||||
*/
|
||||
@Override
|
||||
public Object clone() throws CloneNotSupportedException {
|
||||
return super.clone(); // A shallow copy is just what we need.
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets this trie to its initial state.
|
||||
*/
|
||||
public ByteTrie reset() {
|
||||
pos_=root_;
|
||||
remainingMatchLength_=-1;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* ByteTrie state object, for saving a trie's current state
|
||||
* and resetting the trie back to this state later.
|
||||
*/
|
||||
public static final class State {
|
||||
public State() {}
|
||||
private byte[] bytes;
|
||||
private int pos;
|
||||
private int remainingMatchLength;
|
||||
};
|
||||
|
||||
/**
|
||||
* Saves the state of this trie.
|
||||
* @see #resetToState
|
||||
*/
|
||||
public ByteTrie saveState(State state) /*const*/ {
|
||||
state.bytes=bytes_;
|
||||
state.pos=pos_;
|
||||
state.remainingMatchLength=remainingMatchLength_;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets this trie to the saved state.
|
||||
* @throws IllegalArgumentException if the state object contains no state,
|
||||
* or the state of a different trie
|
||||
* @see #saveState
|
||||
* @see #reset
|
||||
*/
|
||||
public ByteTrie resetToState(State state) {
|
||||
if(bytes_==state.bytes && bytes_!=null) {
|
||||
pos_=state.pos;
|
||||
remainingMatchLength_=state.remainingMatchLength;
|
||||
} else {
|
||||
throw new IllegalArgumentException("incompatible trie state");
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return values for ByteTrie.next(), UCharTrie.next() and similar methods.
|
||||
*/
|
||||
public enum Result {
|
||||
/**
|
||||
* The input unit(s) did not continue a matching string.
|
||||
*/
|
||||
NO_MATCH,
|
||||
/**
|
||||
* The input unit(s) continued a matching string
|
||||
* but there is no value for the string so far.
|
||||
* (It is a prefix of a longer string.)
|
||||
*/
|
||||
NO_VALUE,
|
||||
/**
|
||||
* The input unit(s) continued a matching string
|
||||
* and there is a value for the string so far.
|
||||
* This value will be returned by getValue().
|
||||
* No further input byte/unit can continue a matching string.
|
||||
*/
|
||||
HAS_FINAL_VALUE,
|
||||
/**
|
||||
* The input unit(s) continued a matching string
|
||||
* and there is a value for the string so far.
|
||||
* This value will be returned by getValue().
|
||||
* Another input byte/unit can continue a matching string.
|
||||
*/
|
||||
HAS_VALUE;
|
||||
|
||||
/**
|
||||
* Same as (result!=NO_MATCH).
|
||||
* @return true if the input bytes/units so far are part of a matching string/byte sequence.
|
||||
*/
|
||||
public boolean matches() { return ordinal()!=0; }
|
||||
|
||||
/**
|
||||
* Equivalent to (result==HAS_VALUE || result==HAS_FINAL_VALUE).
|
||||
* @return true if there is a value for the input bytes/units so far.
|
||||
* @see #getValue
|
||||
*/
|
||||
public boolean hasValue() { return ordinal()>=2; }
|
||||
|
||||
/**
|
||||
* Equivalent to (result==NO_VALUE || result==HAS_VALUE).
|
||||
* @return true if another input byte/unit can continue a matching string.
|
||||
*/
|
||||
public boolean hasNext() { return (ordinal()&1)!=0; }
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the byte sequence so far matches, whether it has a value,
|
||||
* and whether another input byte can continue a matching byte sequence.
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
public Result current() /*const*/ {
|
||||
int pos=pos_;
|
||||
if(pos<0) {
|
||||
return Result.NO_MATCH;
|
||||
} else {
|
||||
int node;
|
||||
return (remainingMatchLength_<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
|
||||
valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverses the trie from the initial state for this input byte.
|
||||
* Equivalent to reset().next(inByte).
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
public Result first(int inByte) {
|
||||
remainingMatchLength_=-1;
|
||||
return nextImpl(root_, inByte);
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverses the trie from the current state for this input byte.
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
public Result next(int inByte) {
|
||||
int pos=pos_;
|
||||
if(pos<0) {
|
||||
return Result.NO_MATCH;
|
||||
}
|
||||
int length=remainingMatchLength_; // Actual remaining match length minus 1.
|
||||
if(length>=0) {
|
||||
// Remaining part of a linear-match node.
|
||||
if(inByte==(bytes_[pos++]&0xff)) {
|
||||
remainingMatchLength_=--length;
|
||||
pos_=pos;
|
||||
int node;
|
||||
return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
|
||||
valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
|
||||
} else {
|
||||
stop();
|
||||
return Result.NO_MATCH;
|
||||
}
|
||||
}
|
||||
return nextImpl(pos, inByte);
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverses the trie from the current state for this byte sequence.
|
||||
* Equivalent to
|
||||
* <pre>
|
||||
* Result result=current();
|
||||
* for(each c in s)
|
||||
* if((result=next(c))==Result.NO_MATCH) return Result.NO_MATCH;
|
||||
* return result;
|
||||
* </pre>
|
||||
* @return The match/value Result.
|
||||
*/
|
||||
// public Result next(const char *s, int length);
|
||||
|
||||
/**
|
||||
* Returns a matching byte sequence's value if called immediately after
|
||||
* current()/first()/next() returned Result.HAS_VALUE or Result.HAS_FINAL_VALUE.
|
||||
* getValue() can be called multiple times.
|
||||
*
|
||||
* Do not call getValue() after Result.NO_MATCH or Result.NO_VALUE!
|
||||
*/
|
||||
public int getValue() /*const*/ {
|
||||
int pos=pos_;
|
||||
int leadByte=bytes_[pos++]&0xff;
|
||||
assert(leadByte>=kMinValueLead);
|
||||
return readValue(bytes_, pos, leadByte>>1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether all byte sequences reachable from the current state
|
||||
* map to the same value, and if so, returns that value.
|
||||
* @return the unique value in bits 32..1 with bit 0 set,
|
||||
* if all byte sequences reachable from the current state
|
||||
* map to the same value; otherwise returns 0.
|
||||
*/
|
||||
public long getUniqueValue() /*const*/ {
|
||||
int pos=pos_;
|
||||
if(pos<0) {
|
||||
return 0;
|
||||
}
|
||||
// Skip the rest of a pending linear-match node.
|
||||
long uniqueValue=findUniqueValue(bytes_, pos+remainingMatchLength_+1, 0);
|
||||
// Ignore internally used bits 63..33; extend the actual value's sign bit from bit 32.
|
||||
return (uniqueValue<<31)>>31;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds each byte which continues the byte sequence from the current state.
|
||||
* That is, each byte b for which it would be next(b)!=Result.NO_MATCH now.
|
||||
* @param out Each next byte is 0-extended to a char and appended to this object.
|
||||
* (Only uses the out.append(c) method.)
|
||||
* @return the number of bytes which continue the byte sequence from here
|
||||
*/
|
||||
public int getNextBytes(Appendable out) /*const*/ {
|
||||
int pos=pos_;
|
||||
if(pos<0) {
|
||||
return 0;
|
||||
}
|
||||
if(remainingMatchLength_>=0) {
|
||||
append(out, bytes_[pos]&0xff); // Next byte of a pending linear-match node.
|
||||
return 1;
|
||||
}
|
||||
int node=bytes_[pos++]&0xff;
|
||||
if(node>=kMinValueLead) {
|
||||
if((node&kValueIsFinal)!=0) {
|
||||
return 0;
|
||||
} else {
|
||||
pos=skipValue(pos, node);
|
||||
node=bytes_[pos++]&0xff;
|
||||
assert(node<kMinValueLead);
|
||||
}
|
||||
}
|
||||
if(node<kMinLinearMatch) {
|
||||
if(node==0) {
|
||||
node=bytes_[pos++]&0xff;
|
||||
}
|
||||
getNextBranchBytes(bytes_, pos, ++node, out);
|
||||
return node;
|
||||
} else {
|
||||
// First byte of the linear-match node.
|
||||
append(out, bytes_[pos]&0xff);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
private void stop() {
|
||||
pos_=-1;
|
||||
}
|
||||
|
||||
// Reads a compact 32-bit integer.
|
||||
// pos is already after the leadByte, and the lead byte is already shifted right by 1.
|
||||
private static int readValue(byte[] bytes, int pos, int leadByte) {
|
||||
int value;
|
||||
if(leadByte<kMinTwoByteValueLead) {
|
||||
value=leadByte-kMinOneByteValueLead;
|
||||
} else if(leadByte<kMinThreeByteValueLead) {
|
||||
value=((leadByte-kMinTwoByteValueLead)<<8)|(bytes[pos]&0xff);
|
||||
} else if(leadByte<kFourByteValueLead) {
|
||||
value=((leadByte-kMinThreeByteValueLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff);
|
||||
} else if(leadByte==kFourByteValueLead) {
|
||||
value=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff);
|
||||
} else {
|
||||
value=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
private static int skipValue(int pos, int leadByte) {
|
||||
assert(leadByte>=kMinValueLead);
|
||||
if(leadByte>=(kMinTwoByteValueLead<<1)) {
|
||||
if(leadByte<(kMinThreeByteValueLead<<1)) {
|
||||
++pos;
|
||||
} else if(leadByte<(kFourByteValueLead<<1)) {
|
||||
pos+=2;
|
||||
} else {
|
||||
pos+=3+((leadByte>>1)&1);
|
||||
}
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
private static int skipValue(byte[] bytes, int pos) {
|
||||
int leadByte=bytes[pos++]&0xff;
|
||||
return skipValue(pos, leadByte);
|
||||
}
|
||||
|
||||
// Reads a jump delta and jumps.
|
||||
private static int jumpByDelta(byte[] bytes, int pos) {
|
||||
int delta=bytes[pos++]&0xff;
|
||||
if(delta<kMinTwoByteDeltaLead) {
|
||||
// nothing to do
|
||||
} else if(delta<kMinThreeByteDeltaLead) {
|
||||
delta=((delta-kMinTwoByteDeltaLead)<<8)|(bytes[pos++]&0xff);
|
||||
} else if(delta<kFourByteDeltaLead) {
|
||||
delta=((delta-kMinThreeByteDeltaLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff);
|
||||
pos+=2;
|
||||
} else if(delta==kFourByteDeltaLead) {
|
||||
delta=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff);
|
||||
pos+=3;
|
||||
} else {
|
||||
delta=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff);
|
||||
pos+=4;
|
||||
}
|
||||
return pos+delta;
|
||||
}
|
||||
|
||||
private static int skipDelta(byte[] bytes, int pos) {
|
||||
int delta=bytes[pos++]&0xff;
|
||||
if(delta>=kMinTwoByteDeltaLead) {
|
||||
if(delta<kMinThreeByteDeltaLead) {
|
||||
++pos;
|
||||
} else if(delta<kFourByteDeltaLead) {
|
||||
pos+=2;
|
||||
} else {
|
||||
pos+=3+(delta&1);
|
||||
}
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
private static Result[] valueResults_={ Result.HAS_VALUE, Result.HAS_FINAL_VALUE };
|
||||
|
||||
// Handles a branch node for both next(byte) and next(string).
|
||||
private Result branchNext(int pos, int length, int inByte) {
|
||||
// Branch according to the current byte.
|
||||
if(length==0) {
|
||||
length=bytes_[pos++]&0xff;
|
||||
}
|
||||
++length;
|
||||
// The length of the branch is the number of bytes to select from.
|
||||
// The data structure encodes a binary search.
|
||||
while(length>kMaxBranchLinearSubNodeLength) {
|
||||
if(inByte<(bytes_[pos++]&0xff)) {
|
||||
length>>=1;
|
||||
pos=jumpByDelta(bytes_, pos);
|
||||
} else {
|
||||
length=length-(length>>1);
|
||||
pos=skipDelta(bytes_, pos);
|
||||
}
|
||||
}
|
||||
// Drop down to linear search for the last few bytes.
|
||||
// length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
|
||||
// and divides length by 2.
|
||||
do {
|
||||
if(inByte==(bytes_[pos++]&0xff)) {
|
||||
Result result;
|
||||
int node=bytes_[pos]&0xff;
|
||||
assert(node>=kMinValueLead);
|
||||
if((node&kValueIsFinal)!=0) {
|
||||
// Leave the final value for getValue() to read.
|
||||
result=Result.HAS_FINAL_VALUE;
|
||||
} else {
|
||||
// Use the non-final value as the jump delta.
|
||||
++pos;
|
||||
// int delta=readValue(pos, node>>1);
|
||||
node>>=1;
|
||||
int delta;
|
||||
if(node<kMinTwoByteValueLead) {
|
||||
delta=node-kMinOneByteValueLead;
|
||||
} else if(node<kMinThreeByteValueLead) {
|
||||
delta=((node-kMinTwoByteValueLead)<<8)|(bytes_[pos++]&0xff);
|
||||
} else if(node<kFourByteValueLead) {
|
||||
delta=((node-kMinThreeByteValueLead)<<16)|((bytes_[pos]&0xff)<<8)|(bytes_[pos+1]&0xff);
|
||||
pos+=2;
|
||||
} else if(node==kFourByteValueLead) {
|
||||
delta=((bytes_[pos]&0xff)<<16)|((bytes_[pos+1]&0xff)<<8)|(bytes_[pos+2]&0xff);
|
||||
pos+=3;
|
||||
} else {
|
||||
delta=(bytes_[pos]<<24)|((bytes_[pos+1]&0xff)<<16)|((bytes_[pos+2]&0xff)<<8)|(bytes_[pos+3]&0xff);
|
||||
pos+=4;
|
||||
}
|
||||
// end readValue()
|
||||
pos+=delta;
|
||||
node=bytes_[pos]&0xff;
|
||||
result= node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
|
||||
}
|
||||
pos_=pos;
|
||||
return result;
|
||||
}
|
||||
--length;
|
||||
pos=skipValue(bytes_, pos);
|
||||
} while(length>1);
|
||||
if(inByte==(bytes_[pos++]&0xff)) {
|
||||
pos_=pos;
|
||||
int node=bytes_[pos]&0xff;
|
||||
return node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
|
||||
} else {
|
||||
stop();
|
||||
return Result.NO_MATCH;
|
||||
}
|
||||
}
|
||||
|
||||
// Requires remainingLength_<0.
|
||||
private Result nextImpl(int pos, int inByte) {
|
||||
for(;;) {
|
||||
int node=bytes_[pos++]&0xff;
|
||||
if(node<kMinLinearMatch) {
|
||||
return branchNext(pos, node, inByte);
|
||||
} else if(node<kMinValueLead) {
|
||||
// Match the first of length+1 bytes.
|
||||
int length=node-kMinLinearMatch; // Actual match length minus 1.
|
||||
if(inByte==(bytes_[pos++]&0xff)) {
|
||||
remainingMatchLength_=--length;
|
||||
pos_=pos;
|
||||
return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
|
||||
valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
|
||||
} else {
|
||||
// No match.
|
||||
break;
|
||||
}
|
||||
} else if((node&kValueIsFinal)!=0) {
|
||||
// No further matching bytes.
|
||||
break;
|
||||
} else {
|
||||
// Skip intermediate value.
|
||||
pos=skipValue(pos, node);
|
||||
// The next node must not also be a value node.
|
||||
assert((bytes_[pos]&0xff)<kMinValueLead);
|
||||
}
|
||||
}
|
||||
stop();
|
||||
return Result.NO_MATCH;
|
||||
}
|
||||
|
||||
// Helper functions for hasUniqueValue().
|
||||
// Recursively finds a unique value (or whether there is not a unique one)
|
||||
// from a branch.
|
||||
// uniqueValue: On input, same as for getUniqueValue()/findUniqueValue().
|
||||
// On return, if not 0, then bits 63..33 contain the updated non-negative pos.
|
||||
private static long findUniqueValueFromBranch(byte[] bytes, int pos, int length,
|
||||
long uniqueValue) {
|
||||
while(length>kMaxBranchLinearSubNodeLength) {
|
||||
++pos; // ignore the comparison byte
|
||||
uniqueValue=findUniqueValueFromBranch(bytes, jumpByDelta(bytes, pos), length>>1, uniqueValue);
|
||||
if(uniqueValue==0) {
|
||||
return 0;
|
||||
}
|
||||
length=length-(length>>1);
|
||||
pos=skipDelta(bytes, pos);
|
||||
}
|
||||
do {
|
||||
++pos; // ignore a comparison byte
|
||||
// handle its value
|
||||
int node=bytes[pos++]&0xff;
|
||||
boolean isFinal=(node&kValueIsFinal)!=0;
|
||||
int value=readValue(bytes, pos, node>>1);
|
||||
pos=skipValue(pos, node);
|
||||
if(isFinal) {
|
||||
if(uniqueValue!=0) {
|
||||
if(value!=(int)(uniqueValue>>1)) {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
uniqueValue=((long)value<<1)|1;
|
||||
}
|
||||
} else {
|
||||
uniqueValue=findUniqueValue(bytes, pos+value, uniqueValue);
|
||||
if(uniqueValue==0) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
} while(--length>1);
|
||||
// ignore the last comparison byte
|
||||
return ((long)(pos+1)<<33)|(uniqueValue&0x1ffffffffL);
|
||||
}
|
||||
// Recursively finds a unique value (or whether there is not a unique one)
|
||||
// starting from a position on a node lead byte.
|
||||
// uniqueValue: If there is one, then bits 32..1 contain the value and bit 0 is set.
|
||||
// Otherwise, uniqueValue is 0. Bits 63..33 are ignored.
|
||||
private static long findUniqueValue(byte[] bytes, int pos, long uniqueValue) {
|
||||
for(;;) {
|
||||
int node=bytes[pos++]&0xff;
|
||||
if(node<kMinLinearMatch) {
|
||||
if(node==0) {
|
||||
node=bytes[pos++]&0xff;
|
||||
}
|
||||
uniqueValue=findUniqueValueFromBranch(bytes, pos, node+1, uniqueValue);
|
||||
if(uniqueValue==0) {
|
||||
return 0;
|
||||
}
|
||||
pos=(int)(uniqueValue>>>33);
|
||||
} else if(node<kMinValueLead) {
|
||||
// linear-match node
|
||||
pos+=node-kMinLinearMatch+1; // Ignore the match bytes.
|
||||
} else {
|
||||
boolean isFinal=(node&kValueIsFinal)!=0;
|
||||
int value=readValue(bytes, pos, node>>1);
|
||||
if(uniqueValue!=0) {
|
||||
if(value!=(int)(uniqueValue>>1)) {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
uniqueValue=((long)value<<1)|1;
|
||||
}
|
||||
if(isFinal) {
|
||||
return uniqueValue;
|
||||
}
|
||||
pos=skipValue(pos, node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions for getNextBytes().
|
||||
// getNextBytes() when pos is on a branch node.
|
||||
private static void getNextBranchBytes(byte[] bytes, int pos, int length, Appendable out) {
|
||||
while(length>kMaxBranchLinearSubNodeLength) {
|
||||
++pos; // ignore the comparison byte
|
||||
getNextBranchBytes(bytes, jumpByDelta(bytes, pos), length>>1, out);
|
||||
length=length-(length>>1);
|
||||
pos=skipDelta(bytes, pos);
|
||||
}
|
||||
do {
|
||||
append(out, bytes[pos++]&0xff);
|
||||
pos=skipValue(bytes, pos);
|
||||
} while(--length>1);
|
||||
append(out, bytes[pos]&0xff);
|
||||
}
|
||||
private static void append(Appendable out, int c) {
|
||||
try {
|
||||
out.append((char)c);
|
||||
} catch(IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
// ByteTrie data structure
|
||||
//
|
||||
// The trie consists of a series of byte-serialized nodes for incremental
|
||||
// string/byte sequence matching. The root node is at the beginning of the trie data.
|
||||
//
|
||||
// Types of nodes are distinguished by their node lead byte ranges.
|
||||
// After each node, except a final-value node, another node follows to
|
||||
// encode match values or continue matching further bytes.
|
||||
//
|
||||
// Node types:
|
||||
// - Value node: Stores a 32-bit integer in a compact, variable-length format.
|
||||
// The value is for the string/byte sequence so far.
|
||||
// One node bit indicates whether the value is final or whether
|
||||
// matching continues with the next node.
|
||||
// - Linear-match node: Matches a number of bytes.
|
||||
// - Branch node: Branches to other nodes according to the current input byte.
|
||||
// The node byte is the length of the branch (number of bytes to select from)
|
||||
// minus 1. It is followed by a sub-node:
|
||||
// - If the length is at most kMaxBranchLinearSubNodeLength, then
|
||||
// there are length-1 (key, value) pairs and then one more comparison byte.
|
||||
// If one of the key bytes matches, then the value is either a final value for
|
||||
// the string/byte sequence so far, or a "jump" delta to the next node.
|
||||
// If the last byte matches, then matching continues with the next node.
|
||||
// (Values have the same encoding as value nodes.)
|
||||
// - If the length is greater than kMaxBranchLinearSubNodeLength, then
|
||||
// there is one byte and one "jump" delta.
|
||||
// If the input byte is less than the sub-node byte, then "jump" by delta to
|
||||
// the next sub-node which will have a length of length/2.
|
||||
// (The delta has its own compact encoding.)
|
||||
// Otherwise, skip the "jump" delta to the next sub-node
|
||||
// which will have a length of length-length/2.
|
||||
|
||||
// Node lead byte values.
|
||||
|
||||
// 00..0f: Branch node. If node!=0 then the length is node+1, otherwise
|
||||
// the length is one more than the next byte.
|
||||
|
||||
// For a branch sub-node with at most this many entries, we drop down
|
||||
// to a linear search.
|
||||
private static final int kMaxBranchLinearSubNodeLength=5;
|
||||
|
||||
// 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node.
|
||||
private static final int kMinLinearMatch=0x10;
|
||||
private static final int kMaxLinearMatchLength=0x10;
|
||||
|
||||
// 20..ff: Variable-length value node.
|
||||
// If odd, the value is final. (Otherwise, intermediate value or jump delta.)
|
||||
// Then shift-right by 1 bit.
|
||||
// The remaining lead byte value indicates the number of following bytes (0..4)
|
||||
// and contains the value's top bits.
|
||||
private static final int kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength; // 0x20
|
||||
// It is a final value if bit 0 is set.
|
||||
private static final int kValueIsFinal=1;
|
||||
|
||||
// Compact value: After testing bit 0, shift right by 1 and then use the following thresholds.
|
||||
private static final int kMinOneByteValueLead=kMinValueLead/2; // 0x10
|
||||
private static final int kMaxOneByteValue=0x40; // At least 6 bits in the first byte.
|
||||
|
||||
private static final int kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1; // 0x51
|
||||
private static final int kMaxTwoByteValue=0x1aff;
|
||||
|
||||
private static final int kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1; // 0x6c
|
||||
private static final int kFourByteValueLead=0x7e;
|
||||
|
||||
// A little more than Unicode code points. (0x11ffff)
|
||||
/*package*/ static final int kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1;
|
||||
|
||||
/*package*/ static final int kFiveByteValueLead=0x7f;
|
||||
|
||||
// Compact delta integers.
|
||||
private static final int kMaxOneByteDelta=0xbf;
|
||||
private static final int kMinTwoByteDeltaLead=kMaxOneByteDelta+1; // 0xc0
|
||||
private static final int kMinThreeByteDeltaLead=0xf0;
|
||||
private static final int kFourByteDeltaLead=0xfe;
|
||||
/*package*/ static final int kFiveByteDeltaLead=0xff;
|
||||
|
||||
/*package*/ static final int kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1; // 0x2fff
|
||||
/*package*/ static final int kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1; // 0xdffff
|
||||
|
||||
// Fixed value referencing the ByteTrie bytes.
|
||||
private byte[] bytes_;
|
||||
private int root_;
|
||||
|
||||
// Iterator variables.
|
||||
|
||||
// Index of next trie byte to read. Negative if no more matches.
|
||||
private int pos_;
|
||||
// Remaining length of a linear-match node, minus 1. Negative if not in such a node.
|
||||
private int remainingMatchLength_;
|
||||
};
|
|
@ -6,6 +6,7 @@
|
|||
* Author: Alan Liu
|
||||
* Created: November 5 2002
|
||||
* Since: ICU 2.4
|
||||
* 2010nov19 Markus Scherer Rewrite for formatVersion 2.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
|
@ -17,7 +18,6 @@ import java.io.IOException;
|
|||
import java.io.InputStream;
|
||||
import java.util.MissingResourceException;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
|
||||
/**
|
||||
|
@ -25,13 +25,12 @@ import com.ibm.icu.lang.UProperty;
|
|||
* imported from icu4c. It contains property and property value
|
||||
* aliases from the UCD files PropertyAliases.txt and
|
||||
* PropertyValueAliases.txt. The file is built by the icu4c tool
|
||||
* genpname. It must be built on an ASCII big-endian platform to be
|
||||
* genpname. It must be an ASCII big-endian file to be
|
||||
* usable in icu4j.
|
||||
*
|
||||
* This class performs two functions.
|
||||
*
|
||||
* (1) It can import the flat binary data into a tree of usable
|
||||
* objects.
|
||||
* (1) It can import the flat binary data into usable objects.
|
||||
*
|
||||
* (2) It provides an API to access the tree of objects.
|
||||
*
|
||||
|
@ -39,170 +38,186 @@ import com.ibm.icu.lang.UProperty;
|
|||
* of icu4c's pnames.icu file.
|
||||
*
|
||||
* Each time a UPropertyAliases is constructed, the pnames.icu file is
|
||||
* read, parsed, and a data tree assembled. Clients should create one
|
||||
* read, parsed, and data structures assembled. Clients should create one
|
||||
* singleton instance and cache it.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @since ICU 2.4
|
||||
*/
|
||||
public final class UPropertyAliases implements ICUBinary.Authenticate {
|
||||
public final class UPropertyAliases {
|
||||
// Byte offsets from the start of the data, after the generic header.
|
||||
private static final int IX_VALUE_MAPS_OFFSET=0;
|
||||
private static final int IX_BYTE_TRIES_OFFSET=1;
|
||||
private static final int IX_NAME_GROUPS_OFFSET=2;
|
||||
private static final int IX_RESERVED3_OFFSET=3;
|
||||
// private static final int IX_RESERVED4_OFFSET=4;
|
||||
// private static final int IX_TOTAL_SIZE=5;
|
||||
|
||||
// Other values.
|
||||
// private static final int IX_MAX_NAME_LENGTH=6;
|
||||
// private static final int IX_RESERVED7=7;
|
||||
// private static final int IX_COUNT=8;
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Runtime data. This is an unflattened representation of the
|
||||
// data in pnames.icu.
|
||||
|
||||
/**
|
||||
* Map from property enum value to nameGroupPool[] index
|
||||
*/
|
||||
private NonContiguousEnumToShort enumToName;
|
||||
private int[] valueMaps;
|
||||
private byte[] byteTries;
|
||||
private String nameGroups;
|
||||
|
||||
/**
|
||||
* Map from property alias to property enum value
|
||||
*/
|
||||
private NameToEnum nameToEnum;
|
||||
private static final class IsAcceptable implements ICUBinary.Authenticate {
|
||||
// @Override when we switch to Java 6
|
||||
public boolean isDataVersionAcceptable(byte version[]) {
|
||||
return version[0]==2;
|
||||
}
|
||||
}
|
||||
private static final IsAcceptable IS_ACCEPTABLE=new IsAcceptable();
|
||||
private static final byte DATA_FORMAT[]={ 0x70, 0x6E, 0x61, 0x6D }; // "pnam"
|
||||
|
||||
/**
|
||||
* Map from property enum value to valueMapArray[] index
|
||||
*/
|
||||
private NonContiguousEnumToShort enumToValue;
|
||||
private void load(InputStream data) throws IOException {
|
||||
BufferedInputStream bis=new BufferedInputStream(data);
|
||||
//dataVersion=ICUBinary.readHeaderAndDataVersion(bis, DATA_FORMAT, IS_ACCEPTABLE);
|
||||
ICUBinary.readHeader(bis, DATA_FORMAT, IS_ACCEPTABLE);
|
||||
DataInputStream ds=new DataInputStream(bis);
|
||||
int indexesLength=ds.readInt()/4; // inIndexes[IX_VALUE_MAPS_OFFSET]/4
|
||||
if(indexesLength<8) { // formatVersion 2 initially has 8 indexes
|
||||
throw new IOException("pnames.icu: not enough indexes");
|
||||
}
|
||||
int[] inIndexes=new int[indexesLength];
|
||||
inIndexes[0]=indexesLength*4;
|
||||
for(int i=1; i<indexesLength; ++i) {
|
||||
inIndexes[i]=ds.readInt();
|
||||
}
|
||||
|
||||
/**
|
||||
* Each entry represents a binary or enumerated property
|
||||
*/
|
||||
private ValueMap valueMapArray[];
|
||||
// Read the valueMaps.
|
||||
int offset=inIndexes[IX_VALUE_MAPS_OFFSET];
|
||||
int nextOffset=inIndexes[IX_BYTE_TRIES_OFFSET];
|
||||
int numInts=(nextOffset-offset)/4;
|
||||
valueMaps=new int[numInts];
|
||||
for(int i=0; i<numInts; ++i) {
|
||||
valueMaps[i]=ds.readInt();
|
||||
}
|
||||
|
||||
/**
|
||||
* Pool of concatenated integer runs. Each run contains one
|
||||
* or more entries. The last entry of the run is negative.
|
||||
* A zero entry indicates "n/a" in the Property*Aliases.txt.
|
||||
* Each entry is a stringPool[] index.
|
||||
*/
|
||||
private short nameGroupPool[];
|
||||
// Read the byteTries.
|
||||
offset=nextOffset;
|
||||
nextOffset=inIndexes[IX_NAME_GROUPS_OFFSET];
|
||||
int numBytes=nextOffset-offset;
|
||||
byteTries=new byte[numBytes];
|
||||
ds.readFully(byteTries);
|
||||
|
||||
/**
|
||||
* Pool of strings.
|
||||
*/
|
||||
private String stringPool[];
|
||||
// Read the nameGroups and turn them from ASCII bytes into a Java String.
|
||||
offset=nextOffset;
|
||||
nextOffset=inIndexes[IX_RESERVED3_OFFSET];
|
||||
numBytes=nextOffset-offset;
|
||||
StringBuilder sb=new StringBuilder(numBytes);
|
||||
for(int i=0; i<numBytes; ++i) {
|
||||
sb.append((char)ds.readByte());
|
||||
}
|
||||
nameGroups=sb.toString();
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Constants
|
||||
data.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Debug flag (not really constant)
|
||||
*/
|
||||
private static boolean DEBUG = ICUDebug.enabled("pnames");
|
||||
|
||||
/**
|
||||
* File format that this class understands.
|
||||
* See icu4c/src/common/propname.h.
|
||||
*/
|
||||
private static final byte DATA_FORMAT_ID[] = {'p', 'n', 'a', 'm'};
|
||||
|
||||
/**
|
||||
* File version that this class understands.
|
||||
* See icu4c/src/common/propname.h.
|
||||
*/
|
||||
private static final byte DATA_FORMAT_VERSION = 1;
|
||||
|
||||
/**
|
||||
* Name of the datafile
|
||||
*/
|
||||
private static final String DATA_FILE_NAME = ICUResourceBundle.ICU_BUNDLE+"/pnames.icu";
|
||||
|
||||
/**
|
||||
* Buffer size of datafile. The whole file is < 16k.
|
||||
*/
|
||||
private static final int DATA_BUFFER_SIZE = 8192;
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Constructor
|
||||
|
||||
/**
|
||||
* Constructs a UPropertyAliases object. The binary file
|
||||
* DATA_FILE_NAME is read from the jar/classpath and unflattened
|
||||
* into member variables of this object.
|
||||
*/
|
||||
private UPropertyAliases() throws IOException {
|
||||
load(ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/pnames.icu"));
|
||||
}
|
||||
|
||||
// Open the .icu file from the jar/classpath
|
||||
InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME);
|
||||
BufferedInputStream b = new BufferedInputStream(is, DATA_BUFFER_SIZE);
|
||||
// Read and discard Unicode version...
|
||||
/* byte unicodeVersion[] = */ICUBinary.readHeader(b, DATA_FORMAT_ID, this);
|
||||
DataInputStream d = new DataInputStream(b);
|
||||
|
||||
// Record the origin position of the file. Keep enough around
|
||||
// to seek back to the start of the header.
|
||||
d.mark(256);
|
||||
|
||||
short enumToName_offset = d.readShort();
|
||||
short nameToEnum_offset = d.readShort();
|
||||
short enumToValue_offset = d.readShort();
|
||||
short total_size = d.readShort();
|
||||
short valueMap_offset = d.readShort();
|
||||
short valueMap_count = d.readShort();
|
||||
short nameGroupPool_offset = d.readShort();
|
||||
short nameGroupPool_count = d.readShort();
|
||||
short stringPool_offset = d.readShort();
|
||||
short stringPool_count = d.readShort();
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(
|
||||
"enumToName_offset=" + enumToName_offset + "\n" +
|
||||
"nameToEnum_offset=" + nameToEnum_offset + "\n" +
|
||||
"enumToValue_offset=" + enumToValue_offset + "\n" +
|
||||
"total_size=" + total_size + "\n" +
|
||||
"valueMap_offset=" + valueMap_offset + "\n" +
|
||||
"valueMap_count=" + valueMap_count + "\n" +
|
||||
"nameGroupPool_offset=" + nameGroupPool_offset + "\n" +
|
||||
"nameGroupPool_count=" + nameGroupPool_count + "\n" +
|
||||
"stringPool_offset=" + stringPool_offset + "\n" +
|
||||
"stringPool_count=" + stringPool_count);
|
||||
private int findProperty(int property) {
|
||||
int i=1; // valueMaps index, initially after numRanges
|
||||
for(int numRanges=valueMaps[0]; numRanges>0; --numRanges) {
|
||||
// Read and skip the start and limit of this range.
|
||||
int start=valueMaps[i];
|
||||
int limit=valueMaps[i+1];
|
||||
i+=2;
|
||||
if(property<start) {
|
||||
break;
|
||||
}
|
||||
if(property<limit) {
|
||||
return i+(property-start)*2;
|
||||
}
|
||||
i+=(limit-start)*2; // Skip all entries for this range.
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Read it all (less than 32k). Seeking around (using
|
||||
// mark/reset/skipBytes) doesn't work directly on the file,
|
||||
// but it works fine if we read everything into a byte[] array
|
||||
// first.
|
||||
byte raw[] = new byte[total_size];
|
||||
d.reset();
|
||||
d.readFully(raw);
|
||||
d.close();
|
||||
|
||||
Builder builder = new Builder(raw);
|
||||
|
||||
stringPool = builder.readStringPool(stringPool_offset,
|
||||
stringPool_count);
|
||||
|
||||
nameGroupPool = builder.readNameGroupPool(nameGroupPool_offset,
|
||||
nameGroupPool_count);
|
||||
|
||||
builder.setupValueMap_map(valueMap_offset, valueMap_count);
|
||||
|
||||
// Some of the following data structures have to be set up
|
||||
// here, _not_ in Builder. That's because they are instances
|
||||
// of non-static inner classes, and they contain implicit
|
||||
// references to this.
|
||||
|
||||
builder.seek(enumToName_offset);
|
||||
enumToName = new NonContiguousEnumToShort(builder);
|
||||
builder.nameGroupOffsetToIndex(enumToName.offsetArray);
|
||||
|
||||
builder.seek(nameToEnum_offset);
|
||||
nameToEnum = new NameToEnum(builder);
|
||||
|
||||
builder.seek(enumToValue_offset);
|
||||
enumToValue = new NonContiguousEnumToShort(builder);
|
||||
builder.valueMapOffsetToIndex(enumToValue.offsetArray);
|
||||
|
||||
valueMapArray = new ValueMap[valueMap_count];
|
||||
for (int i=0; i<valueMap_count; ++i) {
|
||||
// Must seek to the start of each entry.
|
||||
builder.seek(builder.valueMap_map[i]);
|
||||
valueMapArray[i] = new ValueMap(builder);
|
||||
private int findPropertyValueNameGroup(int valueMapIndex, int value) {
|
||||
if(valueMapIndex==0) {
|
||||
return 0; // The property does not have named values.
|
||||
}
|
||||
++valueMapIndex; // Skip the ByteTrie offset.
|
||||
int numRanges=valueMaps[valueMapIndex++];
|
||||
if(numRanges<0x10) {
|
||||
// Ranges of values.
|
||||
for(; numRanges>0; --numRanges) {
|
||||
// Read and skip the start and limit of this range.
|
||||
int start=valueMaps[valueMapIndex];
|
||||
int limit=valueMaps[valueMapIndex+1];
|
||||
valueMapIndex+=2;
|
||||
if(value<start) {
|
||||
break;
|
||||
}
|
||||
if(value<limit) {
|
||||
return valueMaps[valueMapIndex+value-start];
|
||||
}
|
||||
valueMapIndex+=limit-start; // Skip all entries for this range.
|
||||
}
|
||||
} else {
|
||||
// List of values.
|
||||
int valuesStart=valueMapIndex;
|
||||
int nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
|
||||
do {
|
||||
int v=valueMaps[valueMapIndex];
|
||||
if(value<v) {
|
||||
break;
|
||||
}
|
||||
if(value==v) {
|
||||
return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
|
||||
}
|
||||
} while(++valueMapIndex<nameGroupOffsetsStart);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
builder.close();
|
||||
private String getName(int nameGroupsIndex, int nameIndex) {
|
||||
int numNames=nameGroups.charAt(nameGroupsIndex++);
|
||||
if(nameIndex<0 || numNames<=nameIndex) {
|
||||
throw new IllegalIcuArgumentException("Invalid property (value) name choice");
|
||||
}
|
||||
// Skip nameIndex names.
|
||||
for(; nameIndex>0; --nameIndex) {
|
||||
while(0!=nameGroups.charAt(nameGroupsIndex++)) {}
|
||||
}
|
||||
// Find the end of this name.
|
||||
int nameStart=nameGroupsIndex;
|
||||
while(0!=nameGroups.charAt(nameGroupsIndex)) {
|
||||
++nameGroupsIndex;
|
||||
}
|
||||
if(nameStart==nameGroupsIndex) {
|
||||
return null; // no name (Property[Value]Aliases.txt has "n/a")
|
||||
}
|
||||
return nameGroups.substring(nameStart, nameGroupsIndex);
|
||||
}
|
||||
|
||||
private static int asciiToLowercase(int c) {
|
||||
return 'A'<=c && c<='Z' ? c+0x20 : c;
|
||||
}
|
||||
|
||||
private boolean containsName(ByteTrie trie, CharSequence name) {
|
||||
ByteTrie.Result result=ByteTrie.Result.NO_VALUE;
|
||||
for(int i=0; i<name.length(); ++i) {
|
||||
int c=name.charAt(i);
|
||||
// Ignore delimiters '-', '_', and ASCII White_Space.
|
||||
if(c=='-' || c=='_' || c==' ' || (0x09<=c && c<=0x0d)) {
|
||||
continue;
|
||||
}
|
||||
if(!result.hasNext()) {
|
||||
return false;
|
||||
}
|
||||
c=asciiToLowercase(c);
|
||||
result=trie.next(c);
|
||||
}
|
||||
return result.hasValue();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
@ -215,194 +230,86 @@ public final class UPropertyAliases implements ICUBinary.Authenticate {
|
|||
INSTANCE = new UPropertyAliases();
|
||||
} catch(IOException e) {
|
||||
///CLOVER:OFF
|
||||
throw new MissingResourceException("Could not construct UPropertyAliases. Missing pnames.icu","","");
|
||||
MissingResourceException mre = new MissingResourceException(
|
||||
"Could not construct UPropertyAliases. Missing pnames.icu", "", "");
|
||||
mre.initCause(e);
|
||||
throw mre;
|
||||
///CLOVER:ON
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a property name given a property enum. Multiple
|
||||
* names may be available for each property; the nameChoice
|
||||
* selects among them.
|
||||
* Returns a property name given a property enum.
|
||||
* Multiple names may be available for each property;
|
||||
* the nameChoice selects among them.
|
||||
*/
|
||||
public String getPropertyName(int property,
|
||||
int nameChoice) {
|
||||
short nameGroupIndex = enumToName.getShort(property);
|
||||
return chooseNameInGroup(nameGroupIndex, nameChoice);
|
||||
public String getPropertyName(int property, int nameChoice) {
|
||||
int valueMapIndex=findProperty(property);
|
||||
if(valueMapIndex==0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
|
||||
}
|
||||
return getName(valueMaps[valueMapIndex], nameChoice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a property enum given one of its property names.
|
||||
* Returns a value name given a property enum and a value enum.
|
||||
* Multiple names may be available for each value;
|
||||
* the nameChoice selects among them.
|
||||
*/
|
||||
public String getPropertyValueName(int property, int value, int nameChoice) {
|
||||
int valueMapIndex=findProperty(property);
|
||||
if(valueMapIndex==0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
|
||||
}
|
||||
int nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
|
||||
if(nameGroupOffset==0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Property "+property+" (0x"+Integer.toHexString(property)+
|
||||
") does not have named values");
|
||||
}
|
||||
return getName(nameGroupOffset, nameChoice);
|
||||
}
|
||||
|
||||
private int getPropertyOrValueEnum(int byteTrieOffset, CharSequence alias) {
|
||||
ByteTrie trie=new ByteTrie(byteTries, byteTrieOffset);
|
||||
if(containsName(trie, alias)) {
|
||||
return trie.getValue();
|
||||
} else {
|
||||
return UProperty.UNDEFINED;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a property enum given one of its property names.
|
||||
* If the property name is not known, this method returns
|
||||
* UProperty.UNDEFINED.
|
||||
*/
|
||||
public int getPropertyEnum(String propertyAlias) {
|
||||
return nameToEnum.getEnum(propertyAlias);
|
||||
public int getPropertyEnum(CharSequence alias) {
|
||||
return getPropertyOrValueEnum(0, alias);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a value name given a property enum and a value enum.
|
||||
* Multiple names may be available for each value; the nameChoice
|
||||
* selects among them.
|
||||
* Returns a value enum given a property enum and one of its value names.
|
||||
*/
|
||||
public String getPropertyValueName(int property,
|
||||
int value,
|
||||
int nameChoice) {
|
||||
ValueMap vm = getValueMap(property);
|
||||
short nameGroupIndex = vm.enumToName.getShort(value);
|
||||
return chooseNameInGroup(nameGroupIndex, nameChoice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a value enum given one of its value names and the
|
||||
* corresponding property alias.
|
||||
*/
|
||||
public int getPropertyValueEnum(int property,
|
||||
String valueAlias) {
|
||||
ValueMap vm = getValueMap(property);
|
||||
return vm.nameToEnum.getEnum(valueAlias);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Data structures
|
||||
|
||||
/**
|
||||
* A map for the legal values of a binary or enumerated properties.
|
||||
*/
|
||||
private class ValueMap {
|
||||
|
||||
/**
|
||||
* Maps value enum to index into the nameGroupPool[]
|
||||
*/
|
||||
EnumToShort enumToName; // polymorphic
|
||||
|
||||
/**
|
||||
* Maps value name to value enum.
|
||||
*/
|
||||
NameToEnum nameToEnum;
|
||||
|
||||
ValueMap(Builder b) throws IOException {
|
||||
short enumToName_offset = b.readShort();
|
||||
short ncEnumToName_offset = b.readShort();
|
||||
short nameToEnum_offset = b.readShort();
|
||||
if (enumToName_offset != 0) {
|
||||
b.seek(enumToName_offset);
|
||||
ContiguousEnumToShort x = new ContiguousEnumToShort(b);
|
||||
b.nameGroupOffsetToIndex(x.offsetArray);
|
||||
enumToName = x;
|
||||
} else {
|
||||
b.seek(ncEnumToName_offset);
|
||||
NonContiguousEnumToShort x = new NonContiguousEnumToShort(b);
|
||||
b.nameGroupOffsetToIndex(x.offsetArray);
|
||||
enumToName = x;
|
||||
}
|
||||
b.seek(nameToEnum_offset);
|
||||
nameToEnum = new NameToEnum(b);
|
||||
public int getPropertyValueEnum(int property, CharSequence alias) {
|
||||
int valueMapIndex=findProperty(property);
|
||||
if(valueMapIndex==0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
|
||||
}
|
||||
valueMapIndex=valueMaps[valueMapIndex+1];
|
||||
if(valueMapIndex==0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Property "+property+" (0x"+Integer.toHexString(property)+
|
||||
") does not have named values");
|
||||
}
|
||||
// valueMapIndex is the start of the property's valueMap,
|
||||
// where the first word is the ByteTrie offset.
|
||||
return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
|
||||
}
|
||||
|
||||
/**
|
||||
* Abstract map from enum values to integers.
|
||||
*/
|
||||
private interface EnumToShort {
|
||||
short getShort(int enumProbe);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic map from enum values to offsets. Enum values are
|
||||
* contiguous.
|
||||
*/
|
||||
private static class ContiguousEnumToShort implements EnumToShort {
|
||||
int enumStart;
|
||||
int enumLimit;
|
||||
short offsetArray[];
|
||||
|
||||
public short getShort(int enumProbe) {
|
||||
if (enumProbe < enumStart || enumProbe >= enumLimit) {
|
||||
throw new IllegalIcuArgumentException("Invalid enum. enumStart = " +enumStart +
|
||||
" enumLimit = " + enumLimit +
|
||||
" enumProbe = " + enumProbe );
|
||||
}
|
||||
return offsetArray[enumProbe - enumStart];
|
||||
}
|
||||
|
||||
ContiguousEnumToShort(ICUBinaryStream s) throws IOException {
|
||||
enumStart = s.readInt();
|
||||
enumLimit = s.readInt();
|
||||
int count = enumLimit - enumStart;
|
||||
offsetArray = new short[count];
|
||||
for (int i=0; i<count; ++i) {
|
||||
offsetArray[i] = s.readShort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic map from enum values to offsets. Enum values need not
|
||||
* be contiguous.
|
||||
*/
|
||||
private static class NonContiguousEnumToShort implements EnumToShort {
|
||||
int enumArray[];
|
||||
short offsetArray[];
|
||||
|
||||
public short getShort(int enumProbe) {
|
||||
for (int i=0; i<enumArray.length; ++i) {
|
||||
if (enumArray[i] < enumProbe) continue;
|
||||
if (enumArray[i] > enumProbe) break;
|
||||
return offsetArray[i];
|
||||
}
|
||||
throw new IllegalIcuArgumentException("Invalid enum");
|
||||
}
|
||||
|
||||
NonContiguousEnumToShort(ICUBinaryStream s) throws IOException {
|
||||
int i;
|
||||
int count = s.readInt();
|
||||
enumArray = new int[count];
|
||||
offsetArray = new short[count];
|
||||
for (i=0; i<count; ++i) {
|
||||
enumArray[i] = s.readInt();
|
||||
}
|
||||
for (i=0; i<count; ++i) {
|
||||
offsetArray[i] = s.readShort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map from names to enum values.
|
||||
*/
|
||||
private class NameToEnum {
|
||||
int enumArray[];
|
||||
short nameArray[];
|
||||
|
||||
int getEnum(String nameProbe) {
|
||||
for (int i=0; i<nameArray.length; ++i) {
|
||||
int c = UPropertyAliases.compare(nameProbe,
|
||||
stringPool[nameArray[i]]);
|
||||
if (c > 0) continue;
|
||||
if (c < 0) break;
|
||||
return enumArray[i];
|
||||
}
|
||||
return UProperty.UNDEFINED;
|
||||
}
|
||||
|
||||
NameToEnum(Builder b) throws IOException {
|
||||
int i;
|
||||
int count = b.readInt();
|
||||
enumArray = new int[count];
|
||||
nameArray = new short[count];
|
||||
for (i=0; i<count; ++i) {
|
||||
enumArray[i] = b.readInt();
|
||||
}
|
||||
for (i=0; i<count; ++i) {
|
||||
nameArray[i] = b.stringOffsetToIndex(b.readShort());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Runtime implementation
|
||||
|
||||
/**
|
||||
* Compare two property names, returning <0, 0, or >0. The
|
||||
* comparison is that described as "loose" matching in the
|
||||
|
@ -447,7 +354,7 @@ public final class UPropertyAliases implements ICUBinary.Authenticate {
|
|||
cstrb = 0;
|
||||
}
|
||||
|
||||
rc = UCharacter.toLowerCase(cstra) - UCharacter.toLowerCase(cstrb);
|
||||
rc = asciiToLowercase(cstra) - asciiToLowercase(cstrb);
|
||||
if (rc != 0) {
|
||||
return rc;
|
||||
}
|
||||
|
@ -456,218 +363,4 @@ public final class UPropertyAliases implements ICUBinary.Authenticate {
|
|||
++istrb;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given an index to a run within the nameGroupPool[], and a
|
||||
* nameChoice (0,1,...), select the nameChoice-th entry of the run.
|
||||
*/
|
||||
private String chooseNameInGroup(short nameGroupIndex, int nameChoice) {
|
||||
if (nameChoice < 0) {
|
||||
throw new IllegalIcuArgumentException("Invalid name choice");
|
||||
}
|
||||
while (nameChoice-- > 0) {
|
||||
if (nameGroupPool[nameGroupIndex++] < 0) {
|
||||
throw new IllegalIcuArgumentException("Invalid name choice");
|
||||
}
|
||||
}
|
||||
short a = nameGroupPool[nameGroupIndex];
|
||||
return stringPool[(a < 0) ? -a : a];
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the valueMap[] entry for a given property.
|
||||
*/
|
||||
private ValueMap getValueMap(int property) {
|
||||
int valueMapIndex = enumToValue.getShort(property);
|
||||
return valueMapArray[valueMapIndex];
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// ICUBinary API
|
||||
|
||||
/**
|
||||
* Return true if the given data version can be used.
|
||||
*/
|
||||
public boolean isDataVersionAcceptable(byte version[]) {
|
||||
return version[0] == DATA_FORMAT_VERSION;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Builder
|
||||
|
||||
/**
|
||||
* A specialized ICUBinaryStream that can map between offsets and
|
||||
* index values into various arrays (stringPool, nameGroupPool,
|
||||
* and valueMap). It also knows how to read various structures.
|
||||
*/
|
||||
static class Builder extends ICUBinaryStream {
|
||||
|
||||
// map[i] = offset of object i. We need maps for all of our
|
||||
// arrays. The arrays are indexed by offset in the raw binary
|
||||
// file; we need to translate that to index.
|
||||
|
||||
private short stringPool_map[];
|
||||
|
||||
private short valueMap_map[];
|
||||
|
||||
private short nameGroup_map[];
|
||||
|
||||
public Builder(byte raw[]) {
|
||||
super(raw);
|
||||
}
|
||||
|
||||
/**
|
||||
* The valueMap_map[] must be setup in advance. This method
|
||||
* does that.
|
||||
*/
|
||||
public void setupValueMap_map(short offset, short count) {
|
||||
valueMap_map = new short[count];
|
||||
for (int i=0; i<count; ++i) {
|
||||
// Start of each entry. Each entry is 6 bytes long.
|
||||
valueMap_map[i] = (short) (offset + i * 6);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read stringPool[]. Build up translation table from offsets
|
||||
* to string indices (stringPool_map[]).
|
||||
*/
|
||||
public String[] readStringPool(short offset, short count)
|
||||
throws IOException {
|
||||
seek(offset);
|
||||
// Allocate one more stringPool entry than needed. Use this
|
||||
// to store a "no string" entry in the pool, at index 0. This
|
||||
// maps to offset 0, so let stringPool_map[0] = 0.
|
||||
String stringPool[] = new String[count + 1];
|
||||
stringPool_map = new short[count + 1];
|
||||
short pos = offset;
|
||||
StringBuilder buf = new StringBuilder();
|
||||
stringPool_map[0] = 0;
|
||||
for (int i=1; i<=count; ++i) {
|
||||
buf.setLength(0);
|
||||
for (;;) {
|
||||
// This works because the name is invariant-ASCII
|
||||
char c = (char) readUnsignedByte();
|
||||
if (c == 0) break;
|
||||
buf.append(c);
|
||||
}
|
||||
stringPool_map[i] = pos;
|
||||
stringPool[i] = buf.toString();
|
||||
pos += stringPool[i].length() + 1;
|
||||
}
|
||||
if (DEBUG) {
|
||||
System.out.println("read stringPool x " + count +
|
||||
": " + stringPool[1] + ", " +
|
||||
stringPool[2] + ", " +
|
||||
stringPool[3] + ",...");
|
||||
}
|
||||
return stringPool;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the nameGroupPool[], and build up the offset->index
|
||||
* map (nameGroupPool_map[]).
|
||||
*/
|
||||
public short[] readNameGroupPool(short offset, short count)
|
||||
throws IOException {
|
||||
// Read nameGroupPool[]. This contains offsets from start of
|
||||
// header. We translate these into indices into stringPool[]
|
||||
// on the fly. The offset 0, which indicates "no entry", we
|
||||
// translate into index 0, which contains a null String
|
||||
// pointer.
|
||||
seek(offset);
|
||||
short pos = offset;
|
||||
short nameGroupPool[] = new short[count];
|
||||
nameGroup_map = new short[count];
|
||||
for (int i=0; i<count; ++i) {
|
||||
nameGroup_map[i] = pos;
|
||||
nameGroupPool[i] = stringOffsetToIndex(readShort());
|
||||
pos += 2;
|
||||
}
|
||||
if (DEBUG) {
|
||||
System.out.println("read nameGroupPool x " + count +
|
||||
": " + nameGroupPool[0] + ", " +
|
||||
nameGroupPool[1] + ", " +
|
||||
nameGroupPool[2] + ",...");
|
||||
}
|
||||
return nameGroupPool;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an offset into the string pool into a stringPool[]
|
||||
* index.
|
||||
*/
|
||||
private short stringOffsetToIndex(short offset) {
|
||||
int probe = offset;
|
||||
if (probe < 0) probe = -probe;
|
||||
for (int i=0; i<stringPool_map.length; ++i) {
|
||||
if (stringPool_map[i] == probe) {
|
||||
return (short) ((offset < 0) ? -i : i);
|
||||
}
|
||||
}
|
||||
throw new IllegalStateException("Can't map string pool offset " +
|
||||
offset + " to index");
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an array of offsets into the string pool into an
|
||||
* array of stringPool[] indices. MODIFIES THE ARRAY IN
|
||||
* PLACE.
|
||||
*/
|
||||
/* private void stringOffsetToIndex(short array[]) {
|
||||
for (int i=0; i<array.length; ++i) {
|
||||
array[i] = stringOffsetToIndex(array[i]);
|
||||
}
|
||||
}*/
|
||||
|
||||
/**
|
||||
* Convert an offset into the value map into a valueMap[]
|
||||
* index.
|
||||
*/
|
||||
private short valueMapOffsetToIndex(short offset) {
|
||||
for (short i=0; i<valueMap_map.length; ++i) {
|
||||
if (valueMap_map[i] == offset) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
throw new IllegalStateException("Can't map value map offset " +
|
||||
offset + " to index");
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an array of offsets into the value map array into
|
||||
* an array of valueMap[] indices. MODIFIES THE ARRAY IN
|
||||
* PLACE.
|
||||
*/
|
||||
private void valueMapOffsetToIndex(short array[]) {
|
||||
for (int i=0; i<array.length; ++i) {
|
||||
array[i] = valueMapOffsetToIndex(array[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an offset into the name group pool into a
|
||||
* nameGroupPool[] index.
|
||||
*/
|
||||
private short nameGroupOffsetToIndex(short offset) {
|
||||
for (short i=0; i<nameGroup_map.length; ++i) {
|
||||
if (nameGroup_map[i] == offset) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
throw new RuntimeException("Can't map name group offset " + offset +
|
||||
" to index");
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an array of offsets into the name group pool into an
|
||||
* array of nameGroupPool[] indices. MODIFIES THE ARRAY IN
|
||||
* PLACE.
|
||||
*/
|
||||
private void nameGroupOffsetToIndex(short array[]) {
|
||||
for (int i=0; i<array.length; ++i) {
|
||||
array[i] = nameGroupOffsetToIndex(array[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4206,7 +4206,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
* @see UProperty
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
public static int getPropertyEnum(String propertyAlias) {
|
||||
public static int getPropertyEnum(CharSequence propertyAlias) {
|
||||
int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias);
|
||||
if (propEnum == UProperty.UNDEFINED) {
|
||||
throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias);
|
||||
|
@ -4318,7 +4318,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
* selector or valueAlias is not a value of this property
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
public static int getPropertyValueEnum(int property, String valueAlias) {
|
||||
public static int getPropertyValueEnum(int property, CharSequence valueAlias) {
|
||||
int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias);
|
||||
if (propEnum == UProperty.UNDEFINED) {
|
||||
throw new IllegalIcuArgumentException("Invalid name: " + valueAlias);
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bbc7ef8ee9ef1d9992f7a2836e8e6ada320802fbc64bfc8d48966ce705581781
|
||||
size 6739159
|
||||
oid sha256:a5ad97a089e050870cd3e20b5b0c2d38651637e576c9020edc55431e62c31b56
|
||||
size 6740512
|
||||
|
|
Loading…
Add table
Reference in a new issue