mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-5117 Remove unused BreakDictionary and ucmp8 APIs.
X-SVN-Rev: 19427
This commit is contained in:
parent
fb73d7b9a9
commit
64483b9dd3
10 changed files with 19 additions and 1468 deletions
|
@ -61,25 +61,25 @@ LDFLAGS += $(LDFLAGSICUUC)
|
|||
# $(LIBICUDT) is either stub data or the real DLL common data.
|
||||
LIBS = $(LIBICUDT) $(DEFAULT_LIBS)
|
||||
|
||||
OBJECTS = putil.o utypes.o uobject.o cmemory.o umutex.o ucln_cmn.o uinit.o \
|
||||
udata.o ucmndata.o udatamem.o udataswp.o umapfile.o ucol_swp.o \
|
||||
uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
|
||||
ucat.o locmap.o uloc.o locid.o \
|
||||
uhash.o uhash_us.o \
|
||||
ucnv.o ucnv_set.o ucnv_bld.o ucnv_cb.o ucnv_cnv.o ucnv_err.o ucnv_ext.o ucnv_io.o ucnvlat1.o \
|
||||
OBJECTS = putil.o utypes.o uinvchar.o umutex.o ucln_cmn.o uinit.o uobject.o cmemory.o \
|
||||
udata.o ucmndata.o udatamem.o umapfile.o udataswp.o ucol_swp.o utrace.o \
|
||||
uhash.o uhash_us.o uenum.o ustrenum.o uvector.o ustack.o uvectr32.o \
|
||||
ucnv.o ucnv_bld.o ucnv_cnv.o ucnv_io.o ucnv_cb.o ucnv_err.o ucnvlat1.o \
|
||||
ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
|
||||
ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o \
|
||||
unistr.o unistr_case.o unistr_cnv.o unistr_props.o \
|
||||
utf_impl.o ustring.o ustr_cnv.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o uinvchar.o utext.o \
|
||||
ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o \
|
||||
uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
|
||||
ucat.o locmap.o uloc.o locid.o locutil.o \
|
||||
ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
|
||||
utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
|
||||
normlzr.o unorm.o unormcmp.o unorm_it.o chariter.o schriter.o uchriter.o uiter.o \
|
||||
uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o unames.o \
|
||||
uscript.o usc_impl.o uvector.o ustack.o uvectr32.o ucmp8.o \
|
||||
uarrsort.o utrie.o uset.o uset_props.o uniset.o uniset_props.o ruleiter.o caniter.o unifilt.o unifunct.o usetiter.o \
|
||||
brkiter.o ubrk.o brkeng.o dictbe.o triedict.o \
|
||||
uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
|
||||
uscript.o usc_impl.o unames.o \
|
||||
utrie.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
|
||||
uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o triedict.o \
|
||||
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
|
||||
serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o locutil.o \
|
||||
uenum.o ustrenum.o uidna.o usprep.o punycode.o \
|
||||
util.o parsepos.o utrace.o locbased.o cwchar.o wintz.o
|
||||
serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
|
||||
uidna.o usprep.o punycode.o \
|
||||
util.o parsepos.o locbased.o cwchar.o wintz.o
|
||||
|
||||
## Header files to install
|
||||
HEADERS = $(srcdir)/unicode/*.h unicode/*.h
|
||||
|
|
|
@ -1,242 +0,0 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2004 IBM and others. All rights reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 12/1/99 rtg Ported from Java
|
||||
* 01/13/2000 helena Added UErrorCode to ctors.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/ures.h"
|
||||
#include "brkdict.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
//=================================================================================
|
||||
// deserialization
|
||||
//=================================================================================
|
||||
|
||||
BreakDictionary::BreakDictionary(const char* /*dictionaryFilename*/, UErrorCode& status)
|
||||
: columnMap(NULL),
|
||||
table(NULL),
|
||||
rowIndex(NULL),
|
||||
rowIndexFlags(NULL),
|
||||
rowIndexFlagsIndex(NULL),
|
||||
rowIndexShifts(NULL)
|
||||
{
|
||||
if (U_FAILURE(status)) return;
|
||||
|
||||
UResourceBundle *th_dict = ures_open(NULL, "th", &status);
|
||||
th_dict = ures_getByKey(th_dict, "BreakDictionaryData", th_dict, &status);
|
||||
if (U_FAILURE(status)) return;
|
||||
|
||||
int32_t len;
|
||||
const uint8_t * data = ures_getBinary(th_dict, &len, &status);
|
||||
ures_close(th_dict);
|
||||
if (U_FAILURE(status)) return;
|
||||
|
||||
readDictionaryFile(data);
|
||||
}
|
||||
|
||||
BreakDictionary::~BreakDictionary()
|
||||
{
|
||||
ucmp8_close(columnMap);
|
||||
uprv_free(table);
|
||||
uprv_free(rowIndex);
|
||||
uprv_free(rowIndexFlags);
|
||||
uprv_free(rowIndexFlagsIndex);
|
||||
uprv_free(rowIndexShifts);
|
||||
}
|
||||
|
||||
// macros to support readDictionaryFile. The data files originated from a Java
|
||||
// program, and Java always writes data out in big-endian format. These macros will
|
||||
// byte-swap the data for appropriate use on Windows.
|
||||
|
||||
#if U_IS_BIG_ENDIAN
|
||||
#define SWAP32(x)
|
||||
#define SWAP16(x)
|
||||
#else
|
||||
#define SWAP32(x) x = (uint32_t)((x >> 24 & 0xff) | (x >> 8 & 0xff00) | (x << 8 & 0xff0000) | (x << 24 & 0xff000000))
|
||||
#define SWAP16(x) x = (uint16_t)((x << 8 & 0xff00) | (x >> 8 & 0xff))
|
||||
#endif
|
||||
|
||||
#define DICTIONARY_READ(source, destAddr, len) \
|
||||
uprv_memcpy(destAddr, source, len);\
|
||||
source+=(len)
|
||||
|
||||
|
||||
void
|
||||
BreakDictionary::readDictionaryFile(const uint8_t * in)
|
||||
{
|
||||
int32_t l;
|
||||
int32_t version;
|
||||
|
||||
int i;
|
||||
|
||||
// read in the version number (right now we just ignore it)
|
||||
DICTIONARY_READ(in, &version, 4);
|
||||
|
||||
// read in the column map (this is serialized in its internal form:
|
||||
// an index array followed by a data array)
|
||||
DICTIONARY_READ(in, &l, 4);
|
||||
SWAP32(l);
|
||||
uint16_t* temp = (uint16_t*) uprv_malloc(sizeof(uint16_t)*l);
|
||||
DICTIONARY_READ(in, temp, l * sizeof (int16_t) );
|
||||
for (i = 0; i < l; i++) {
|
||||
SWAP16(temp[i]);
|
||||
}
|
||||
DICTIONARY_READ(in, &l, 4);
|
||||
SWAP32(l);
|
||||
int8_t* temp2 = (int8_t*) uprv_malloc(sizeof(int8_t)*l);
|
||||
DICTIONARY_READ(in, temp2, l);
|
||||
columnMap = ucmp8_openAdopt(temp, temp2, l);
|
||||
|
||||
// read in numCols and numColGroups
|
||||
DICTIONARY_READ(in, &numCols, 4);
|
||||
SWAP32(numCols);
|
||||
DICTIONARY_READ(in, &numColGroups, 4);
|
||||
SWAP32(numColGroups);
|
||||
|
||||
// read in the row-number index
|
||||
DICTIONARY_READ(in, &l, 4);
|
||||
SWAP32(l);
|
||||
rowIndex = (int16_t *)uprv_malloc(l*2);
|
||||
DICTIONARY_READ(in, rowIndex, l * sizeof (int16_t) );
|
||||
for (i = 0; i < l; i++) {
|
||||
SWAP16(rowIndex[i]);
|
||||
}
|
||||
|
||||
// load in the populated-cells bitmap: index first, then bitmap list
|
||||
DICTIONARY_READ(in, &l, 4);
|
||||
SWAP32(l);
|
||||
rowIndexFlagsIndex = (int16_t *)uprv_malloc(l*2);
|
||||
DICTIONARY_READ(in, rowIndexFlagsIndex, l * sizeof(int16_t) );
|
||||
for (i = 0; i < l; i++) {
|
||||
SWAP16(rowIndexFlagsIndex[i]);
|
||||
}
|
||||
DICTIONARY_READ(in, &l, 4);
|
||||
SWAP32(l);
|
||||
rowIndexFlags = (int32_t *)uprv_malloc(l*4);
|
||||
DICTIONARY_READ(in, rowIndexFlags, l * sizeof(int32_t));
|
||||
for (i = 0; i < l; i++) {
|
||||
SWAP32(rowIndexFlags[i]);
|
||||
}
|
||||
|
||||
// load in the row-shift index
|
||||
DICTIONARY_READ(in, &l, 4);
|
||||
SWAP32(l);
|
||||
rowIndexShifts = (int8_t *)uprv_malloc(l);
|
||||
DICTIONARY_READ(in, rowIndexShifts, l);
|
||||
|
||||
// finally, load in the actual state table
|
||||
DICTIONARY_READ(in, &l, 4);
|
||||
SWAP32(l);
|
||||
table = (int16_t *)uprv_malloc(l*2);
|
||||
DICTIONARY_READ(in, table, l * sizeof(int16_t) );
|
||||
for (i = 0; i < l; i++) {
|
||||
SWAP16(table[i]);
|
||||
}
|
||||
|
||||
// the reverse column map occurs next in the file. In the C/C++ code, for the
|
||||
// time being, we're not going to worry about that.
|
||||
}
|
||||
|
||||
//=================================================================================
|
||||
// access to the words
|
||||
//=================================================================================
|
||||
|
||||
/**
|
||||
* Uses the column map to map the character to a column number, then
|
||||
* passes the row and column number to the other version of at()
|
||||
* @param row The current state
|
||||
* @param ch The character whose column we're interested in
|
||||
* @return The new state to transition to
|
||||
*/
|
||||
int16_t
|
||||
BreakDictionary::at(int32_t row, UChar ch) const
|
||||
{
|
||||
int16_t col = ucmp8_get(columnMap, ch);
|
||||
return at(row, (int32_t)col);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value in the cell with the specified (logical) row and
|
||||
* column numbers. In DictionaryBasedBreakIterator, the row number is
|
||||
* a state number, the column number is an input, and the return value
|
||||
* is the row number of the new state to transition to. (0 is the
|
||||
* "error" state, and -1 is the "end of word" state in a dictionary)
|
||||
* @param row The row number of the current state
|
||||
* @param col The column number of the input character (0 means "not a
|
||||
* dictionary character")
|
||||
* @return The row number of the new state to transition to
|
||||
*/
|
||||
int16_t
|
||||
BreakDictionary::at(int32_t row, int32_t col) const
|
||||
{
|
||||
if (cellIsPopulated(row, col)) {
|
||||
// we map from logical to physical row number by looking up the
|
||||
// mapping in rowIndex; we map from logical column number to
|
||||
// physical column number by looking up a shift value for this
|
||||
// logical row and offsetting the logical column number by
|
||||
// the shift amount. Then we can use internalAt() to actually
|
||||
// get the value out of the table.
|
||||
return internalAt(rowIndex[row], col + rowIndexShifts[row]);
|
||||
}
|
||||
else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
//=================================================================================
|
||||
// implementation
|
||||
//=================================================================================
|
||||
/**
|
||||
* Given (logical) row and column numbers, returns true if the
|
||||
* cell in that position is populated
|
||||
*/
|
||||
UBool
|
||||
BreakDictionary::cellIsPopulated(int32_t row, int32_t col) const
|
||||
{
|
||||
// look up the entry in the bitmap index for the specified row.
|
||||
// If it's a negative number, it's the column number of the only
|
||||
// populated cell in the row
|
||||
if (rowIndexFlagsIndex[row] < 0) {
|
||||
return col == -rowIndexFlagsIndex[row];
|
||||
}
|
||||
|
||||
// if it's a positive number, it's the offset of an entry in the bitmap
|
||||
// list. If the table is more than 32 columns wide, the bitmap is stored
|
||||
// successive entries in the bitmap list, so we have to divide the column
|
||||
// number by 32 and offset the number we got out of the index by the result.
|
||||
// Once we have the appropriate piece of the bitmap, test the appropriate
|
||||
// bit and return the result.
|
||||
else {
|
||||
int32_t flags = rowIndexFlags[rowIndexFlagsIndex[row] + (col >> 5)];
|
||||
return (flags & (1 << (col & 0x1f))) != 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of at() when we know the specified cell is populated.
|
||||
* @param row The PHYSICAL row number of the cell
|
||||
* @param col The PHYSICAL column number of the cell
|
||||
* @return The value stored in the cell
|
||||
*/
|
||||
int16_t
|
||||
BreakDictionary::internalAt(int32_t row, int32_t col) const
|
||||
{
|
||||
// the table is a one-dimensional array, so this just does the math necessary
|
||||
// to treat it as a two-dimensional array (we don't just use a two-dimensional
|
||||
// array because two-dimensional arrays are inefficient in Java)
|
||||
return table[row * numCols + col];
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
|
@ -1,174 +0,0 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2004 IBM and others. All rights reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 12/1/99 rtg Ported from Java
|
||||
* 01/13/2000 helena Added UErrorCode to ctors.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef BRKDICT_H
|
||||
#define BRKDICT_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "ucmp8.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* This is the class that represents the list of known words used by
|
||||
* DictionaryBasedBreakIterator. The conceptual data structure used
|
||||
* here is a trie: there is a node hanging off the root node for every
|
||||
* letter that can start a word. Each of these nodes has a node hanging
|
||||
* off of it for every letter that can be the second letter of a word
|
||||
* if this node is the first letter, and so on. The trie is represented
|
||||
* as a two-dimensional array that can be treated as a table of state
|
||||
* transitions. Indexes are used to compress this array, taking
|
||||
* advantage of the fact that this array will always be very sparse.
|
||||
*/
|
||||
class BreakDictionary : public UMemory {
|
||||
//=================================================================================
|
||||
// data members
|
||||
//=================================================================================
|
||||
private:
|
||||
|
||||
/**
|
||||
* Maps from characters to column numbers. The main use of this is to
|
||||
* avoid making room in the array for empty columns.
|
||||
*/
|
||||
CompactByteArray* columnMap;
|
||||
|
||||
/**
|
||||
* The number of actual columns in the table
|
||||
*/
|
||||
int32_t numCols;
|
||||
|
||||
/**
|
||||
* Columns are organized into groups of 32. This says how many
|
||||
* column groups. (We could calculate this, but we store the
|
||||
* value to avoid having to repeatedly calculate it.)
|
||||
*/
|
||||
int32_t numColGroups;
|
||||
|
||||
/**
|
||||
* The actual compressed state table. Each conceptual row represents
|
||||
* a state, and the cells in it contain the row numbers of the states
|
||||
* to transition to for each possible letter. 0 is used to indicate
|
||||
* an illegal combination of letters (i.e., the error state). The
|
||||
* table is compressed by eliminating all the unpopulated (i.e., zero)
|
||||
* cells. Multiple conceptual rows can then be doubled up in a single
|
||||
* physical row by sliding them up and possibly shifting them to one
|
||||
* side or the other so the populated cells don't collide. Indexes
|
||||
* are used to identify unpopulated cells and to locate populated cells.
|
||||
*/
|
||||
int16_t* table;
|
||||
|
||||
/**
|
||||
* This index maps logical row numbers to physical row numbers
|
||||
*/
|
||||
int16_t* rowIndex;
|
||||
|
||||
/**
|
||||
* A bitmap is used to tell which cells in the comceptual table are
|
||||
* populated. This array contains all the unique bit combinations
|
||||
* in that bitmap. If the table is more than 32 columns wide,
|
||||
* successive entries in this array are used for a single row.
|
||||
*/
|
||||
int32_t* rowIndexFlags;
|
||||
|
||||
/**
|
||||
* This index maps from a logical row number into the bitmap table above.
|
||||
* (This keeps us from storing duplicate bitmap combinations.) Since there
|
||||
* are a lot of rows with only one populated cell, instead of wasting space
|
||||
* in the bitmap table, we just store a negative number in this index for
|
||||
* rows with one populated cell. The absolute value of that number is
|
||||
* the column number of the populated cell.
|
||||
*/
|
||||
int16_t* rowIndexFlagsIndex;
|
||||
|
||||
/**
|
||||
* For each logical row, this index contains a constant that is added to
|
||||
* the logical column number to get the physical column number
|
||||
*/
|
||||
int8_t* rowIndexShifts;
|
||||
|
||||
//=================================================================================
|
||||
// deserialization
|
||||
//=================================================================================
|
||||
|
||||
public:
|
||||
/**
|
||||
* Constructor. Creates the BreakDictionary by using readDictionaryFile() to
|
||||
* load the dictionary tables from the disk.
|
||||
* @param dictionaryFilename The name of the dictionary file
|
||||
* @param status for errors if it occurs
|
||||
*/
|
||||
BreakDictionary(const char* dictionaryFilename, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
~BreakDictionary();
|
||||
|
||||
/**
|
||||
* Reads the dictionary file on the disk and constructs the appropriate in-memory
|
||||
* representation.
|
||||
* @param in The given memory stream
|
||||
*/
|
||||
void readDictionaryFile(const uint8_t * in);
|
||||
|
||||
//=================================================================================
|
||||
// access to the words
|
||||
//=================================================================================
|
||||
|
||||
/**
|
||||
* Uses the column map to map the character to a column number, then
|
||||
* passes the row and column number to the other version of at()
|
||||
* @param row The current state
|
||||
* @param ch The character whose column we're interested in
|
||||
* @return The new state to transition to
|
||||
*/
|
||||
int16_t at(int32_t row, UChar ch) const;
|
||||
|
||||
/**
|
||||
* Returns the value in the cell with the specified (logical) row and
|
||||
* column numbers. In DictionaryBasedBreakIterator, the row number is
|
||||
* a state number, the column number is an input, and the return value
|
||||
* is the row number of the new state to transition to. (0 is the
|
||||
* "error" state, and -1 is the "end of word" state in a dictionary)
|
||||
* @param row The row number of the current state
|
||||
* @param col The column number of the input character (0 means "not a
|
||||
* dictionary character")
|
||||
* @return The row number of the new state to transition to
|
||||
*/
|
||||
int16_t at(int32_t row, int32_t col) const;
|
||||
|
||||
private:
|
||||
/**
|
||||
* Given (logical) row and column numbers, returns true if the
|
||||
* cell in that position is populated
|
||||
* @param row The LOGICAL row number of the cell
|
||||
* @param col The PHYSICAL row number of the cell
|
||||
* @return true if the cell in that position is populated
|
||||
*/
|
||||
UBool cellIsPopulated(int32_t row, int32_t col) const;
|
||||
|
||||
/**
|
||||
* Implementation of at() when we know the specified cell is populated.
|
||||
* @param row The PHYSICAL row number of the cell
|
||||
* @param col The PHYSICAL column number of the cell
|
||||
* @return The value stored in the cell
|
||||
*/
|
||||
int16_t internalAt(int32_t row, int32_t col) const;
|
||||
|
||||
// the following methods are never meant to be called and so are not defined
|
||||
// (if you don't declare them, you get default implementations)
|
||||
BreakDictionary(const BreakDictionary& that);
|
||||
BreakDictionary& operator=(const BreakDictionary& that);
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
|
@ -209,12 +209,6 @@
|
|||
<Filter
|
||||
Name="break iteration"
|
||||
Filter="">
|
||||
<File
|
||||
RelativePath=".\brkdict.cpp">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\brkdict.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\brkeng.cpp">
|
||||
</File>
|
||||
|
@ -417,12 +411,6 @@
|
|||
<File
|
||||
RelativePath=".\uarrsort.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\ucmp8.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\ucmp8.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\uenum.c">
|
||||
</File>
|
||||
|
|
|
@ -1,572 +0,0 @@
|
|||
/*
|
||||
********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2004, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************
|
||||
*/
|
||||
|
||||
#include "ucmp8.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
/* internal constants*/
|
||||
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucmp8_getkUnicodeCount() { return UCMP8_kUnicodeCount;}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucmp8_getkBlockCount() { return UCMP8_kBlockCount;}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucmp8_initBogus(CompactByteArray* array)
|
||||
{
|
||||
CompactByteArray* this_obj = array;
|
||||
|
||||
if (this_obj == NULL) return;
|
||||
|
||||
this_obj->fStructSize = sizeof(CompactByteArray);
|
||||
this_obj->fArray = NULL;
|
||||
this_obj->fIndex = NULL;
|
||||
this_obj->fCount = UCMP8_kUnicodeCount;
|
||||
this_obj->fCompact = FALSE;
|
||||
this_obj->fBogus = TRUE;
|
||||
this_obj->fAlias = FALSE;
|
||||
this_obj->fIAmOwned = TRUE;
|
||||
}
|
||||
|
||||
/* debug flags*/
|
||||
/*=======================================================*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ucmp8_init(CompactByteArray* array, int8_t defaultValue)
|
||||
{
|
||||
/* set up the index array and the data array.
|
||||
* the index array always points into particular parts of the data array
|
||||
* it is initially set up to point at regular block boundaries
|
||||
* The following example uses blocks of 4 for simplicity
|
||||
* Example: Expanded
|
||||
* INDEX# 0 1 2 3 4
|
||||
* INDEX 0 4 8 12 16 ...
|
||||
* ARRAY abcdeababcedzyabcdea...
|
||||
* | | | | | |...
|
||||
* whenever you set an element in the array, it unpacks to this state
|
||||
* After compression, the index will point to various places in the data array
|
||||
* wherever there is a runs of the same elements as in the original
|
||||
* Example: Compressed
|
||||
* INDEX# 0 1 2 3 4
|
||||
* INDEX 0 4 1 8 2 ...
|
||||
* ARRAY abcdeabazyabc...
|
||||
* If you look at the example, index# 2 in the expanded version points
|
||||
* to data position number 8, which has elements "bced". In the compressed
|
||||
* version, index# 2 points to data position 1, which also has "bced"
|
||||
*/
|
||||
CompactByteArray* this_obj = array;
|
||||
int32_t i;
|
||||
|
||||
if (this_obj == NULL) return;
|
||||
|
||||
this_obj->fStructSize = sizeof(CompactByteArray);
|
||||
this_obj->fArray = NULL;
|
||||
this_obj->fIndex = NULL;
|
||||
this_obj->fCount = UCMP8_kUnicodeCount;
|
||||
this_obj->fCompact = FALSE;
|
||||
this_obj->fBogus = FALSE;
|
||||
this_obj->fAlias = FALSE;
|
||||
this_obj->fIAmOwned = TRUE;
|
||||
|
||||
|
||||
this_obj->fArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
|
||||
if (!this_obj->fArray)
|
||||
{
|
||||
this_obj->fBogus = TRUE;
|
||||
return;
|
||||
}
|
||||
this_obj->fIndex = (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount);
|
||||
if (!this_obj->fIndex)
|
||||
{
|
||||
uprv_free(this_obj->fArray);
|
||||
this_obj->fArray = NULL;
|
||||
this_obj->fBogus = TRUE;
|
||||
return;
|
||||
}
|
||||
for (i = 0; i < UCMP8_kUnicodeCount; ++i)
|
||||
{
|
||||
this_obj->fArray[i] = defaultValue;
|
||||
}
|
||||
for (i = 0; i < UCMP8_kIndexCount; ++i)
|
||||
{
|
||||
this_obj->fIndex[i] = (uint16_t)(i << UCMP8_kBlockShift);
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI CompactByteArray* U_EXPORT2
|
||||
ucmp8_open(int8_t defaultValue)
|
||||
{
|
||||
/* set up the index array and the data array.
|
||||
* the index array always points into particular parts of the data array
|
||||
* it is initially set up to point at regular block boundaries
|
||||
* The following example uses blocks of 4 for simplicity
|
||||
* Example: Expanded
|
||||
* INDEX# 0 1 2 3 4
|
||||
* INDEX 0 4 8 12 16 ...
|
||||
* ARRAY abcdeababcedzyabcdea...
|
||||
* | | | | | |...
|
||||
* whenever you set an element in the array, it unpacks to this state
|
||||
* After compression, the index will point to various places in the data array
|
||||
* wherever there is a runs of the same elements as in the original
|
||||
* Example: Compressed
|
||||
* INDEX# 0 1 2 3 4
|
||||
* INDEX 0 4 1 8 2 ...
|
||||
* ARRAY abcdeabazyabc...
|
||||
* If you look at the example, index# 2 in the expanded version points
|
||||
* to data position number 8, which has elements "bced". In the compressed
|
||||
* version, index# 2 points to data position 1, which also has "bced"
|
||||
*/
|
||||
CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
|
||||
int32_t i;
|
||||
|
||||
if (this_obj == NULL) return NULL;
|
||||
|
||||
this_obj->fStructSize = sizeof(CompactByteArray);
|
||||
this_obj->fArray = NULL;
|
||||
this_obj->fIndex = NULL;
|
||||
this_obj->fCount = UCMP8_kUnicodeCount;
|
||||
this_obj->fCompact = FALSE;
|
||||
this_obj->fBogus = FALSE;
|
||||
this_obj->fAlias = FALSE;
|
||||
this_obj->fIAmOwned = FALSE;
|
||||
|
||||
|
||||
this_obj->fArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
|
||||
if (!this_obj->fArray)
|
||||
{
|
||||
this_obj->fBogus = TRUE;
|
||||
return NULL;
|
||||
}
|
||||
this_obj->fIndex = (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount);
|
||||
if (!this_obj->fIndex)
|
||||
{
|
||||
uprv_free(this_obj->fArray);
|
||||
this_obj->fArray = NULL;
|
||||
this_obj->fBogus = TRUE;
|
||||
return NULL;
|
||||
}
|
||||
for (i = 0; i < UCMP8_kUnicodeCount; ++i)
|
||||
{
|
||||
this_obj->fArray[i] = defaultValue;
|
||||
}
|
||||
for (i = 0; i < UCMP8_kIndexCount; ++i)
|
||||
{
|
||||
this_obj->fIndex[i] = (uint16_t)(i << UCMP8_kBlockShift);
|
||||
}
|
||||
|
||||
return this_obj;
|
||||
}
|
||||
|
||||
U_CAPI CompactByteArray* U_EXPORT2
|
||||
ucmp8_openAdopt(uint16_t *indexArray,
|
||||
int8_t *newValues,
|
||||
int32_t count)
|
||||
{
|
||||
CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
|
||||
/* test for NULL */
|
||||
if(this_obj == NULL)
|
||||
return NULL;
|
||||
ucmp8_initAdopt(this_obj, indexArray, newValues, count);
|
||||
this_obj->fIAmOwned = FALSE;
|
||||
return this_obj;
|
||||
}
|
||||
|
||||
U_CAPI CompactByteArray* U_EXPORT2
|
||||
ucmp8_openAlias(uint16_t *indexArray,
|
||||
int8_t *newValues,
|
||||
int32_t count)
|
||||
{
|
||||
CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
|
||||
/* test for NULL */
|
||||
if(this_obj == NULL)
|
||||
return NULL;
|
||||
ucmp8_initAlias(this_obj, indexArray, newValues, count);
|
||||
this_obj->fIAmOwned = FALSE;
|
||||
return this_obj;
|
||||
}
|
||||
|
||||
/*=======================================================*/
|
||||
|
||||
U_CAPI CompactByteArray* U_EXPORT2
|
||||
ucmp8_initAdopt(CompactByteArray *this_obj,
|
||||
uint16_t *indexArray,
|
||||
int8_t *newValues,
|
||||
int32_t count)
|
||||
{
|
||||
if (this_obj) {
|
||||
this_obj->fCount = count;
|
||||
this_obj->fBogus = FALSE;
|
||||
this_obj->fStructSize = sizeof(CompactByteArray);
|
||||
|
||||
this_obj->fArray = newValues;
|
||||
this_obj->fIndex = indexArray;
|
||||
this_obj->fCompact = (UBool)((count < UCMP8_kUnicodeCount) ? TRUE : FALSE);
|
||||
this_obj->fAlias = FALSE;
|
||||
this_obj->fIAmOwned = TRUE;
|
||||
}
|
||||
|
||||
return this_obj;
|
||||
}
|
||||
|
||||
U_CAPI CompactByteArray* U_EXPORT2
|
||||
ucmp8_initAlias(CompactByteArray *this_obj,
|
||||
uint16_t *indexArray,
|
||||
int8_t *newValues,
|
||||
int32_t count)
|
||||
{
|
||||
if (this_obj) {
|
||||
this_obj->fArray = NULL;
|
||||
this_obj->fIndex = NULL;
|
||||
this_obj->fCount = count;
|
||||
this_obj->fBogus = FALSE;
|
||||
this_obj->fStructSize = sizeof(CompactByteArray);
|
||||
|
||||
this_obj->fArray = newValues;
|
||||
this_obj->fIndex = indexArray;
|
||||
this_obj->fCompact = (UBool)((count < UCMP8_kUnicodeCount) ? TRUE : FALSE);
|
||||
this_obj->fAlias = TRUE;
|
||||
this_obj->fIAmOwned = TRUE;
|
||||
}
|
||||
|
||||
return this_obj;
|
||||
}
|
||||
|
||||
/*=======================================================*/
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucmp8_close(CompactByteArray* this_obj)
|
||||
{
|
||||
if(this_obj != NULL) {
|
||||
if(!this_obj->fAlias) {
|
||||
if(this_obj->fArray != NULL) {
|
||||
uprv_free(this_obj->fArray);
|
||||
}
|
||||
if(this_obj->fIndex != NULL) {
|
||||
uprv_free(this_obj->fIndex);
|
||||
}
|
||||
}
|
||||
if(!this_obj->fIAmOwned) /* Called if 'init' was called instead of 'open'. */
|
||||
{
|
||||
uprv_free(this_obj);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*=======================================================*/
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucmp8_expand(CompactByteArray* this_obj)
|
||||
{
|
||||
/* can optimize later.
|
||||
* if we have to expand, then walk through the blocks instead of using Get
|
||||
* this code unpacks the array by copying the blocks to the normalized position.
|
||||
* Example: Compressed
|
||||
* INDEX# 0 1 2 3 4
|
||||
* INDEX 0 4 1 8 2 ...
|
||||
* ARRAY abcdeabazyabc...
|
||||
* turns into
|
||||
* Example: Expanded
|
||||
* INDEX# 0 1 2 3 4
|
||||
* INDEX 0 4 8 12 16 ...
|
||||
* ARRAY abcdeababcedzyabcdea...
|
||||
*/
|
||||
int32_t i;
|
||||
if (this_obj->fCompact)
|
||||
{
|
||||
int8_t* tempArray;
|
||||
tempArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
|
||||
if (!tempArray)
|
||||
{
|
||||
this_obj->fBogus = TRUE;
|
||||
return;
|
||||
}
|
||||
for (i = 0; i < UCMP8_kUnicodeCount; ++i)
|
||||
{
|
||||
tempArray[i] = ucmp8_get(this_obj,(UChar)i); /* HSYS : How expand?*/
|
||||
}
|
||||
for (i = 0; i < UCMP8_kIndexCount; ++i)
|
||||
{
|
||||
this_obj->fIndex[i] = (uint16_t)(i<< UCMP8_kBlockShift);
|
||||
}
|
||||
uprv_free(this_obj->fArray);
|
||||
this_obj->fArray = tempArray;
|
||||
this_obj->fCompact = FALSE;
|
||||
this_obj->fAlias = FALSE;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*=======================================================*/
|
||||
/* this_obj->fArray: an array to be overlapped
|
||||
* start and count: specify the block to be overlapped
|
||||
* tempIndex: the overlapped array (actually indices back into inputContents)
|
||||
* inputHash: an index of hashes for tempIndex, where
|
||||
* inputHash[i] = XOR of values from i-count+1 to i
|
||||
*/
|
||||
static int32_t
|
||||
findOverlappingPosition(CompactByteArray* this_obj,
|
||||
uint32_t start,
|
||||
const UChar* tempIndex,
|
||||
int32_t tempIndexCount,
|
||||
uint32_t cycle)
|
||||
{
|
||||
/* this_obj is a utility routine for finding blocks that overlap.
|
||||
* IMPORTANT: the cycle number is very important. Small cycles take a lot
|
||||
* longer to work. In some cases, they may be able to get better compaction.
|
||||
*/
|
||||
|
||||
int32_t i;
|
||||
int32_t j;
|
||||
int32_t currentCount;
|
||||
|
||||
for (i = 0; i < tempIndexCount; i += cycle)
|
||||
{
|
||||
currentCount = UCMP8_kBlockCount;
|
||||
if (i + UCMP8_kBlockCount > tempIndexCount)
|
||||
{
|
||||
currentCount = tempIndexCount - i;
|
||||
}
|
||||
for (j = 0; j < currentCount; ++j)
|
||||
{
|
||||
if (this_obj->fArray[start + j] != this_obj->fArray[tempIndex[i + j]])
|
||||
break;
|
||||
}
|
||||
if (j == currentCount)
|
||||
break;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ucmp8_isBogus(const CompactByteArray* this_obj)
|
||||
{
|
||||
return (UBool)(this_obj == NULL || this_obj->fBogus);
|
||||
}
|
||||
|
||||
U_CAPI const int8_t* U_EXPORT2
|
||||
ucmp8_getArray(const CompactByteArray* this_obj)
|
||||
{
|
||||
return this_obj->fArray;
|
||||
}
|
||||
|
||||
U_CAPI const uint16_t* U_EXPORT2
|
||||
ucmp8_getIndex(const CompactByteArray* this_obj)
|
||||
{
|
||||
return this_obj->fIndex;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucmp8_getCount(const CompactByteArray* this_obj)
|
||||
{
|
||||
return this_obj->fCount;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucmp8_set(CompactByteArray* this_obj,
|
||||
UChar c,
|
||||
int8_t value)
|
||||
{
|
||||
if (this_obj->fCompact == TRUE)
|
||||
{
|
||||
ucmp8_expand(this_obj);
|
||||
if (this_obj->fBogus) return;
|
||||
}
|
||||
this_obj->fArray[(int32_t)c] = value;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucmp8_setRange(CompactByteArray* this_obj,
|
||||
UChar start,
|
||||
UChar end,
|
||||
int8_t value)
|
||||
{
|
||||
int32_t i;
|
||||
if (this_obj->fCompact == TRUE)
|
||||
{
|
||||
ucmp8_expand(this_obj);
|
||||
if (this_obj->fBogus)
|
||||
return;
|
||||
}
|
||||
for (i = start; i <= end; ++i)
|
||||
{
|
||||
this_obj->fArray[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*=======================================================*/
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucmp8_compact(CompactByteArray* this_obj,
|
||||
uint32_t cycle)
|
||||
{
|
||||
if (!this_obj->fCompact)
|
||||
{
|
||||
/* this_obj actually does the compaction.
|
||||
* it walks throught the contents of the expanded array, finding the
|
||||
* first block in the data that matches the contents of the current index.
|
||||
* As it works, it keeps an updated pointer to the last position,
|
||||
* so that it knows how big to make the final array
|
||||
* If the matching succeeds, then the index will point into the data
|
||||
* at some earlier position.
|
||||
* If the matching fails, then last position pointer will be bumped,
|
||||
* and the index will point to that last block of data.
|
||||
*/
|
||||
UChar* tempIndex;
|
||||
int32_t tempIndexCount;
|
||||
int8_t* tempArray;
|
||||
int32_t iBlock, iIndex;
|
||||
|
||||
/* fix cycle, must be 0 < cycle <= blockcount*/
|
||||
if (cycle <= 0)
|
||||
cycle = 1;
|
||||
else if (cycle > (uint32_t)UCMP8_kBlockCount)
|
||||
cycle = UCMP8_kBlockCount;
|
||||
|
||||
/* make temp storage, larger than we need*/
|
||||
tempIndex = (UChar*) uprv_malloc(sizeof(UChar)* UCMP8_kUnicodeCount);
|
||||
if (!tempIndex)
|
||||
{
|
||||
this_obj->fBogus = TRUE;
|
||||
return;
|
||||
}
|
||||
/* set up first block.*/
|
||||
tempIndexCount = UCMP8_kBlockCount;
|
||||
for (iIndex = 0; iIndex < UCMP8_kBlockCount; ++iIndex)
|
||||
{
|
||||
tempIndex[iIndex] = (uint16_t)iIndex;
|
||||
} /* endfor (iIndex = 0; .....)*/
|
||||
this_obj->fIndex[0] = 0;
|
||||
|
||||
/* for each successive block, find out its first position in the compacted array*/
|
||||
for (iBlock = 1; iBlock < UCMP8_kIndexCount; ++iBlock)
|
||||
{
|
||||
int32_t newCount, firstPosition, block;
|
||||
block = iBlock << UCMP8_kBlockShift;
|
||||
/* if (debugSmall) if (block > debugSmallLimit) break;*/
|
||||
firstPosition = findOverlappingPosition(this_obj,
|
||||
block,
|
||||
tempIndex,
|
||||
tempIndexCount,
|
||||
cycle);
|
||||
|
||||
/* if not contained in the current list, copy the remainder
|
||||
* invariant; cumulativeHash[iBlock] = XOR of values from iBlock-kBlockCount+1 to iBlock
|
||||
* we do this_obj by XORing out cumulativeHash[iBlock-kBlockCount]
|
||||
*/
|
||||
newCount = firstPosition + UCMP8_kBlockCount;
|
||||
if (newCount > tempIndexCount)
|
||||
{
|
||||
for (iIndex = tempIndexCount; iIndex < newCount; ++iIndex)
|
||||
{
|
||||
tempIndex[iIndex] = (uint16_t)(iIndex - firstPosition + block);
|
||||
} /* endfor (iIndex = tempIndexCount....)*/
|
||||
tempIndexCount = newCount;
|
||||
} /* endif (newCount > tempIndexCount)*/
|
||||
this_obj->fIndex[iBlock] = (uint16_t)firstPosition;
|
||||
} /* endfor (iBlock = 1.....)*/
|
||||
|
||||
/* now allocate and copy the items into the array*/
|
||||
tempArray = (int8_t*) uprv_malloc(tempIndexCount * sizeof(int8_t));
|
||||
if (!tempArray)
|
||||
{
|
||||
this_obj->fBogus = TRUE;
|
||||
uprv_free(tempIndex);
|
||||
return;
|
||||
}
|
||||
for (iIndex = 0; iIndex < tempIndexCount; ++iIndex)
|
||||
{
|
||||
tempArray[iIndex] = this_obj->fArray[tempIndex[iIndex]];
|
||||
}
|
||||
uprv_free(this_obj->fArray);
|
||||
this_obj->fArray = tempArray;
|
||||
this_obj->fCount = tempIndexCount;
|
||||
|
||||
|
||||
/* free up temp storage*/
|
||||
uprv_free(tempIndex);
|
||||
this_obj->fCompact = TRUE;
|
||||
} /* endif (!this_obj->fCompact)*/
|
||||
}
|
||||
|
||||
#define MEMORY_WRITE(destAddr, source, sizeSoFar, len) \
|
||||
if (destAddr) {\
|
||||
uprv_memcpy(destAddr+sizeSoFar, source, len);\
|
||||
}\
|
||||
sizeSoFar += (len)
|
||||
|
||||
U_CAPI uint32_t U_EXPORT2 ucmp8_flattenMem (const CompactByteArray* array, uint8_t *MS)
|
||||
{
|
||||
int32_t size = 0;
|
||||
static const int32_t version = ICU_UCMP8_VERSION;
|
||||
|
||||
MEMORY_WRITE(MS, &version, size, 4);
|
||||
|
||||
MEMORY_WRITE(MS, &array->fCount, size, 4);
|
||||
|
||||
MEMORY_WRITE(MS, array->fIndex, size, sizeof(array->fIndex[0])*UCMP8_kIndexCount);
|
||||
|
||||
MEMORY_WRITE(MS, array->fArray, size, sizeof(array->fArray[0])*array->fCount);
|
||||
|
||||
while(size%4) /* end padding */
|
||||
{
|
||||
uint8_t pad = 0;
|
||||
MEMORY_WRITE(MS, &pad, size, 1);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/* We use sizeof(*array), etc so that this code can be as portable as
|
||||
possible between the ucmpX_ family.
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2 ucmp8_initFromData(CompactByteArray *this_obj, const uint8_t **source, UErrorCode *status)
|
||||
{
|
||||
uint32_t i;
|
||||
const uint8_t *oldSource = *source;
|
||||
|
||||
if(U_FAILURE(*status))
|
||||
return;
|
||||
|
||||
this_obj->fArray = NULL;
|
||||
this_obj->fIndex = NULL;
|
||||
this_obj->fBogus = FALSE;
|
||||
this_obj->fStructSize = sizeof(CompactByteArray);
|
||||
this_obj->fCompact = TRUE;
|
||||
this_obj->fAlias = TRUE;
|
||||
this_obj->fIAmOwned = TRUE;
|
||||
|
||||
i = * ((const uint32_t*) *source);
|
||||
(*source) += 4;
|
||||
|
||||
if(i != ICU_UCMP8_VERSION)
|
||||
{
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
this_obj->fCount = * ((const uint32_t*)*source);
|
||||
(*source) += 4;
|
||||
|
||||
this_obj->fIndex = (uint16_t*) *source;
|
||||
(*source) += sizeof(this_obj->fIndex[0])*UCMP8_kIndexCount;
|
||||
|
||||
this_obj->fArray = (int8_t*) *source;
|
||||
(*source) += sizeof(this_obj->fArray[0])*this_obj->fCount;
|
||||
|
||||
/* eat up padding */
|
||||
while((*source-(oldSource))%4)
|
||||
(*source)++;
|
||||
}
|
|
@ -1,244 +0,0 @@
|
|||
/*
|
||||
********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1996-2004, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#ifndef UCMP8_H
|
||||
#define UCMP8_H
|
||||
|
||||
/* 32-bits.
|
||||
Bump this whenever the internal structure changes.
|
||||
*/
|
||||
#define ICU_UCMP8_VERSION 0x01260000
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/*====================================
|
||||
* class CompactByteArray
|
||||
* Provides a compact way to store information that is indexed by Unicode values,
|
||||
* such as character properties, types, keyboard values, etc.
|
||||
* The ATypes are used by value, so should be small, integers or pointers.
|
||||
*====================================
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2 ucmp8_getkUnicodeCount(void);
|
||||
U_CAPI int32_t U_EXPORT2 ucmp8_getkBlockCount(void);
|
||||
|
||||
typedef struct CompactByteArray {
|
||||
uint32_t fStructSize;
|
||||
int8_t* fArray;
|
||||
uint16_t* fIndex;
|
||||
int32_t fCount;
|
||||
UBool fCompact;
|
||||
UBool fBogus;
|
||||
UBool fAlias;
|
||||
UBool fIAmOwned; /* don't free CBA on close */
|
||||
} CompactByteArray;
|
||||
|
||||
#define UCMP8_kUnicodeCount 65536
|
||||
#define UCMP8_kBlockShift 7
|
||||
#define UCMP8_kBlockCount (1<<UCMP8_kBlockShift)
|
||||
#define UCMP8_kIndexShift (16-UCMP8_kBlockShift)
|
||||
#define UCMP8_kIndexCount (1<<UCMP8_kIndexShift)
|
||||
#define UCMP8_kBlockMask (UCMP8_kBlockCount-1)
|
||||
|
||||
|
||||
/**
|
||||
* Construct an empty CompactByteArray with uprv_malloc(). Do not call any of the
|
||||
* ucmp8_init*() functions after using this function. They will cause a memory
|
||||
* leak.
|
||||
*
|
||||
* @param defaultValue the default value for all characters not explicitly in the array
|
||||
* @see ucmp8_init
|
||||
* @see ucmp8_initBogus
|
||||
* @return The initialized array.
|
||||
*/
|
||||
U_CAPI CompactByteArray* U_EXPORT2 ucmp8_open(int8_t defaultValue);
|
||||
|
||||
/**
|
||||
* Construct a CompactByteArray from a pre-computed index and values array. The values
|
||||
* will be adopted by the CompactByteArray. Memory is allocated with uprv_malloc.
|
||||
* Note: for speed, the compact method will only re-use blocks in the values array
|
||||
* that are on a block boundary. The pre-computed arrays passed in to this constructor
|
||||
* may re-use blocks at any position in the values array. The indexArray and newValues
|
||||
* will be uprv_free'd when ucmp16_close() is called.
|
||||
*
|
||||
* @param indexArray the index array to be adopted
|
||||
* @param newValues the value array to be adopted
|
||||
* @param count the number of entries in the value array
|
||||
* @return the newly constructed ComapctByteArray
|
||||
* @see compact
|
||||
*/
|
||||
U_CAPI CompactByteArray* U_EXPORT2 ucmp8_openAdopt(uint16_t* indexArray,
|
||||
int8_t* newValues,
|
||||
int32_t count);
|
||||
|
||||
/**
|
||||
* Construct a CompactByteArray from a pre-computed index and values array. The values
|
||||
* will be aliased by the CompactByteArray. Memory is allocated with uprv_malloc.
|
||||
* Note: for speed, the compact method will only re-use blocks in the values array
|
||||
* that are on a block boundary. The pre-computed arrays passed in to this constructor
|
||||
* may re-use blocks at any position in the values array.
|
||||
*
|
||||
* @param indexArray the index array to be adopted
|
||||
* @param newValues the value array to be adopted
|
||||
* @param count the number of entries in the value array
|
||||
* @return the newly constructed CompactByteArray
|
||||
* @see compact
|
||||
*/
|
||||
U_CAPI CompactByteArray* U_EXPORT2 ucmp8_openAlias(uint16_t* indexArray,
|
||||
int8_t* newValues,
|
||||
int32_t count);
|
||||
|
||||
|
||||
/**
|
||||
* Initialize an empty CompactByteArray. Do not call this function
|
||||
* if you created the array with ucmp8_open() because it will cause a memory
|
||||
* leak.
|
||||
*
|
||||
* @param defaultValue the default value for all characters not explicitly in the array
|
||||
* @param array An uninitialized CompactByteArray
|
||||
* @see ucmp8_open
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucmp8_init(CompactByteArray* array, int8_t defaultValue);
|
||||
|
||||
/**
|
||||
* Initialize an empty CompactByteArray to the bogus value. Do not call this
|
||||
* function if you created the array with ucmp8_open() because it will cause
|
||||
* a memory leak.
|
||||
*
|
||||
* @param array An uninitialized CompactByteArray
|
||||
* @see ucmp8_open
|
||||
* @see ucmp8_isBogus
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucmp8_initBogus(CompactByteArray* array);
|
||||
|
||||
/**
|
||||
* Initialize a CompactByteArray from a pre-computed index and values array. The values
|
||||
* will be adopted by the CompactByteArray. Memory is allocated with uprv_malloc.
|
||||
* Note: for speed, the compact method will only re-use blocks in the values array
|
||||
* that are on a block boundary. The pre-computed arrays passed in to this constructor
|
||||
* may re-use blocks at any position in the values array. The indexArray and newValues
|
||||
* will be uprv_free'd when ucmp16_close() is called.
|
||||
*
|
||||
* @param this_obj An uninitialized CompactByteArray
|
||||
* @param indexArray the index array to be adopted
|
||||
* @param newValues the value array to be adopted
|
||||
* @param count the number of entries in the value array
|
||||
* @return the pointer refers to the CompactByteArray
|
||||
* @see compact
|
||||
*/
|
||||
U_CAPI CompactByteArray* U_EXPORT2 ucmp8_initAdopt(CompactByteArray *this_obj,
|
||||
uint16_t* indexArray,
|
||||
int8_t* newValues,
|
||||
int32_t count);
|
||||
|
||||
/**
|
||||
* Initialize a CompactByteArray from a pre-computed index and values array. The values
|
||||
* will be aliased by the CompactByteArray. Memory is allocated with uprv_malloc.
|
||||
* Note: for speed, the compact method will only re-use blocks in the values array
|
||||
* that are on a block boundary. The pre-computed arrays passed in to this constructor
|
||||
* may re-use blocks at any position in the values array.
|
||||
*
|
||||
* @param this_obj An uninitialized CompactByteArray
|
||||
* @param indexArray the index array to be adopted
|
||||
* @param newValues the value array to be adopted
|
||||
* @param count the number of entries in the value array
|
||||
* @return the pointer refers to the CompactByteArray
|
||||
* @see compact
|
||||
*/
|
||||
U_CAPI CompactByteArray* U_EXPORT2 ucmp8_initAlias(CompactByteArray *this_obj,
|
||||
uint16_t* indexArray,
|
||||
int8_t* newValues,
|
||||
int32_t count);
|
||||
|
||||
/**
|
||||
* Free up any allocated memory associated with this compact array.
|
||||
* The memory that is uprv_free'd depends on how the array was initialized
|
||||
* or opened.
|
||||
*
|
||||
* @param array The compact array to close
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucmp8_close(CompactByteArray* array);
|
||||
|
||||
/**
|
||||
* Returns TRUE if the creation of the compact array fails.
|
||||
* @param array The CompactByteArray to be created.
|
||||
* @return TRUE if the creation of the compact array fails.
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2 ucmp8_isBogus(const CompactByteArray* array);
|
||||
|
||||
/**
|
||||
* Get the mapped value of a Unicode character.
|
||||
*
|
||||
* @param index the character to get the mapped value with
|
||||
* @return the mapped value of the given character
|
||||
*/
|
||||
#define ucmp8_get(array, index) (array->fArray[(array->fIndex[index >> UCMP8_kBlockShift] & 0xFFFF) + (index & UCMP8_kBlockMask)])
|
||||
|
||||
#define ucmp8_getu(array,index) (uint8_t)ucmp8_get(array,index)
|
||||
|
||||
|
||||
/**
|
||||
* Set a new value for a Unicode character.
|
||||
* Set automatically expands the array if it is compacted.
|
||||
*
|
||||
* @param array the CompactByteArray to be set
|
||||
* @param character the character to set the mapped value with
|
||||
* @param value the new mapped value
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucmp8_set(CompactByteArray* array,
|
||||
UChar character,
|
||||
int8_t value);
|
||||
|
||||
/**
|
||||
* Set new values for a range of Unicode character.
|
||||
*
|
||||
* @param array the CompactByteArray to be set
|
||||
* @param start the starting offset of the range
|
||||
* @param end the ending offset of the range
|
||||
* @param value the new mapped value
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucmp8_setRange(CompactByteArray* array,
|
||||
UChar start,
|
||||
UChar end,
|
||||
int8_t value);
|
||||
|
||||
U_CAPI int32_t U_EXPORT2 ucmp8_getCount(const CompactByteArray* array);
|
||||
U_CAPI const int8_t* U_EXPORT2 ucmp8_getArray(const CompactByteArray* array);
|
||||
U_CAPI const uint16_t* U_EXPORT2 ucmp8_getIndex(const CompactByteArray* array);
|
||||
|
||||
/**
|
||||
* Compact the array.
|
||||
* The value of cycle determines how large the overlap can be.
|
||||
* A cycle of 1 is the most compacted, but takes the most time to do.
|
||||
* If values stored in the array tend to repeat in cycles of, say, 16,
|
||||
* then using that will be faster than cycle = 1, and get almost the
|
||||
* same compression.
|
||||
* @param array The CompactByteArray to be compacted
|
||||
* @param cycle The value determines how large the overlap can be.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucmp8_compact(CompactByteArray* array,
|
||||
uint32_t cycle);
|
||||
|
||||
/** Expanded takes the array back to a 65536 element array
|
||||
* @param array The CompactByteArray to be expanded
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucmp8_expand(CompactByteArray* array);
|
||||
|
||||
/**
|
||||
* Flatten into a memory structure. Pass in NULL to pre-flight to get the required size.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2 ucmp8_flattenMem(const CompactByteArray* array, uint8_t *MS);
|
||||
|
||||
/* initializes an existing CBA from memory. Will cause ucmp8_close() to not deallocate anything. */
|
||||
U_CAPI void U_EXPORT2 ucmp8_initFromData(CompactByteArray* array, const uint8_t **source, UErrorCode *status);
|
||||
|
||||
#endif
|
||||
|
|
@ -58,10 +58,11 @@ cmsccoll.o cmsgtst.o cposxtst.o cldrtest.o \
|
|||
cnmdptst.o cnormtst.o cnumtst.o crestst.o creststn.o cturtst.o \
|
||||
cucdapi.o cucdtst.o custrtst.o cstrcase.o cutiltst.o nucnvtst.o nccbtst.o bocu1tst.o \
|
||||
cbiditst.o cbididat.o eurocreg.o udatatst.o utf16tst.o utransts.o \
|
||||
ncnvfbts.o ncnvtst.o putiltst.o cstrtest.o utf8tst.o ucmptst.o \
|
||||
ncnvfbts.o ncnvtst.o putiltst.o cstrtest.o utf8tst.o \
|
||||
stdnmtst.o ctstdep.o usrchtst.o custrtrn.o sorttest.o trietest.o usettest.o \
|
||||
uenumtst.o utmstest.o currtest.o \
|
||||
idnatest.o nfsprep.o spreptst.o sprpdata.o hpmufn.o tracetst.o reapits.o utexttst.o ucsdetst.o
|
||||
idnatest.o nfsprep.o spreptst.o sprpdata.o \
|
||||
hpmufn.o tracetst.o reapits.o utexttst.o ucsdetst.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
|
|
|
@ -28,7 +28,6 @@ void addUTF16Test(TestNode** root);
|
|||
void addUTF8Test(TestNode** root);
|
||||
void addUTransTest(TestNode** root);
|
||||
void addPUtilTest(TestNode** root);
|
||||
void addCompactArrayTest(TestNode** root);
|
||||
void addTestDeprecatedAPI(TestNode** root);
|
||||
void addUCharTransformTest(TestNode** root);
|
||||
void addUSetTest(TestNode** root);
|
||||
|
@ -51,7 +50,6 @@ void addAllTests(TestNode** root)
|
|||
addConvert(root);
|
||||
addUCharTransformTest(root);
|
||||
addStandardNamesTest(root);
|
||||
addCompactArrayTest(root);
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
addFormatTest(root);
|
||||
#endif
|
||||
|
|
|
@ -269,9 +269,6 @@
|
|||
<File
|
||||
RelativePath=".\trietest.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\ucmptst.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\uenumtst.c">
|
||||
</File>
|
||||
|
|
|
@ -1,201 +0,0 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1998-2004, International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/*
|
||||
* File test.c
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 07/28/2000 Madhu Creation
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "ucmp8.h"
|
||||
#include "cmemory.h"
|
||||
#include "cintltst.h"
|
||||
#include "ucol_imp.h"
|
||||
|
||||
|
||||
static void TestUCMP8API(void);
|
||||
|
||||
void addCompactArrayTest(TestNode** root);
|
||||
|
||||
|
||||
void
|
||||
addCompactArrayTest(TestNode** root)
|
||||
{
|
||||
addTest(root, &TestUCMP8API, "ucmptst/TestUCMP8API");
|
||||
}
|
||||
|
||||
static void query(CompactByteArray *array) {
|
||||
int32_t i = 0;
|
||||
const uint8_t *valuesSet=(uint8_t *)ucmp8_getArray(array);
|
||||
for(i =0 ; i< 10; i++ ){
|
||||
if(valuesSet[0] != (uint8_t)0xFD ){
|
||||
log_err("ERROR: did not get the values expected values\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void TestUCMP8API(){
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
CompactByteArray* ucmp8Array=NULL;
|
||||
CompactByteArray* pAliaser=NULL;
|
||||
CompactByteArray* pAdopter=NULL;
|
||||
|
||||
CompactByteArray aliaser;
|
||||
CompactByteArray adopter;
|
||||
|
||||
|
||||
CompactByteArray ucmp8Array1;
|
||||
CompactByteArray ucmp8Array2;
|
||||
CompactByteArray ucmp8Clone;
|
||||
int8_t *values;
|
||||
uint8_t *valuesSet;
|
||||
static const int8_t TEST_DEFAULT_VALUE = (int8_t)0xFF;
|
||||
|
||||
|
||||
/*ucmp8_open*/
|
||||
log_verbose("Testing ucmp8_open()\n");
|
||||
ucmp8Array=ucmp8_open(TEST_DEFAULT_VALUE);
|
||||
if(ucmp8Array == NULL){
|
||||
log_err("ERROR: ucmp8_open() failed\n");
|
||||
}
|
||||
if( (int32_t)ucmp8_getCount(ucmp8Array) != (int32_t)ucmp8_getkUnicodeCount()) {
|
||||
log_err("ERROR: ucmp8_open failed\n");
|
||||
}
|
||||
|
||||
/*ucmp8_init*/
|
||||
log_verbose("Testing ucmp8_init()\n");
|
||||
ucmp8_init(&ucmp8Array1, TEST_DEFAULT_VALUE);
|
||||
if( (int32_t)ucmp8_getCount(&ucmp8Array1) != (int32_t)ucmp8_getkUnicodeCount() ||
|
||||
ucmp8_getIndex(&ucmp8Array1) == NULL ||
|
||||
ucmp8_getArray(&ucmp8Array1) == NULL ||
|
||||
ucmp8Array1.fBogus != FALSE){
|
||||
log_err("Error: ucmp8_init() failed\n");
|
||||
}
|
||||
/*ucmp8_initBogus*/
|
||||
log_verbose("Testing ucmp8_initBogus()\n");
|
||||
ucmp8_initBogus(&ucmp8Array2);
|
||||
if((int32_t)ucmp8_getCount(&ucmp8Array2) != ucmp8Array2.fCount ||
|
||||
ucmp8_getIndex(&ucmp8Array2) != NULL ||
|
||||
ucmp8_getArray(&ucmp8Array2) != NULL ||
|
||||
ucmp8Array2.fBogus != TRUE){
|
||||
log_err("Error: ucmp8_initBogus() failed\n");
|
||||
}
|
||||
/*ucmp8_getkBlockCount*/
|
||||
if(ucmp8_getkBlockCount() != 128 ){
|
||||
log_err("Error in ucmp8_getkBlockCount()\n");
|
||||
}
|
||||
values=(int8_t*)ucmp8_getArray(&ucmp8Array1);
|
||||
if((uint8_t)values[0] != (uint8_t)TEST_DEFAULT_VALUE){
|
||||
log_err("Error: getArray() or init failed\n");
|
||||
}
|
||||
|
||||
/*ucmp8_compact*/
|
||||
if(ucmp8Array1.fCompact == TRUE){
|
||||
log_err("Error: ucmp8_open failed Got compact for expanded data\n");
|
||||
}
|
||||
ucmp8_compact(&ucmp8Array1, 1);
|
||||
if(ucmp8Array1.fCompact != TRUE){
|
||||
log_err("Error: ucmp8_compact failed\n");
|
||||
}
|
||||
/*ucmp8_set*/
|
||||
ucmp8_set(&ucmp8Array1, 0, (uint8_t)0xFE);
|
||||
valuesSet=(uint8_t*)ucmp8_getArray(&ucmp8Array1);
|
||||
if(valuesSet[0] != (uint8_t)0xFE ){
|
||||
log_err("ERROR: ucmp8_set() failed\n");
|
||||
}
|
||||
if(ucmp8Array1.fCompact == TRUE){
|
||||
log_err("Error: ucmp8_set didn't expand the compact data \n");
|
||||
}
|
||||
|
||||
/*ucmp8_set*/
|
||||
ucmp8_compact(&ucmp8Array1, 1);
|
||||
ucmp8_set(&ucmp8Array1, 0, (uint8_t)0xFD);
|
||||
valuesSet=(uint8_t*)ucmp8_getArray(&ucmp8Array1);
|
||||
if(valuesSet[0] != (uint8_t)0xFD ){
|
||||
log_err("ERROR: ucmp8_set() failed\n");
|
||||
}
|
||||
if(ucmp8Array1.fCompact == TRUE){
|
||||
log_err("Error: ucmp8_set didn't expand the compact data \n");
|
||||
}
|
||||
/*ucmp8_setRange*/
|
||||
ucmp8_compact(&ucmp8Array1, 1);
|
||||
ucmp8_setRange(&ucmp8Array1, 0, 10, (uint8_t)0xFD);
|
||||
query(&ucmp8Array1);
|
||||
|
||||
log_verbose("Testing ucmp8_flattenMem()\n");
|
||||
{
|
||||
int32_t len = 0;
|
||||
int32_t size = ucmp8_flattenMem(&ucmp8Array1, NULL);
|
||||
uint8_t *buff = malloc(size);
|
||||
uint8_t *buffLocation = buff;
|
||||
len = ucmp8_flattenMem(&ucmp8Array1, buff);
|
||||
|
||||
if(size != len || size == 0 || len == 0 || buff == NULL) {
|
||||
log_err("Unable to flatten!\n");
|
||||
} else {
|
||||
log_verbose("Testing ucmp8_initFromData()\n");
|
||||
ucmp8_initFromData(&ucmp8Clone, (const uint8_t **)&buffLocation, &status);
|
||||
if(U_FAILURE(status) || ucmp8_isBogus(&ucmp8Clone) == TRUE || (buffLocation-buff) != len){
|
||||
log_err("ERROR: ucmp8_initFromData() failed\n");
|
||||
status = U_ZERO_ERROR;
|
||||
} else {
|
||||
query(&ucmp8Clone);
|
||||
ucmp8_close(&ucmp8Clone);
|
||||
}
|
||||
}
|
||||
free(buff);
|
||||
}
|
||||
|
||||
/*
|
||||
openAdopt, initAdopt, openAlias, initAlias
|
||||
*/
|
||||
log_verbose("Testing aliasers and adopters\n");
|
||||
{
|
||||
int32_t count = ucmp8_getCount(&ucmp8Array1);
|
||||
const uint16_t *tIndex = ucmp8_getIndex(&ucmp8Array1);
|
||||
const int8_t *tValues = ucmp8_getArray(&ucmp8Array1);
|
||||
uint16_t *index = (uint16_t *)uprv_malloc(UCMP8_kIndexCount*sizeof(uint16_t));
|
||||
|
||||
values = (int8_t *)uprv_malloc(count);
|
||||
|
||||
memcpy(index, tIndex, UCMP8_kIndexCount*sizeof(uint16_t));
|
||||
memcpy(values, tValues, count);
|
||||
|
||||
ucmp8_initAlias(&aliaser, index, values, count);
|
||||
query(&aliaser);
|
||||
ucmp8_close(&aliaser);
|
||||
|
||||
pAliaser = ucmp8_openAlias(index, values, count);
|
||||
query(pAliaser);
|
||||
ucmp8_close(pAliaser);
|
||||
|
||||
ucmp8_initAdopt(&adopter, index, values, count); /* TODO: BAD API. Adopted memory MUST be allocated with uprv_malloc */
|
||||
query(&adopter);
|
||||
ucmp8_close(&adopter);
|
||||
|
||||
index = (uint16_t *)uprv_malloc(UCMP8_kIndexCount*sizeof(uint16_t));
|
||||
values = (int8_t *)uprv_malloc(count);
|
||||
|
||||
memcpy(index, tIndex, UCMP8_kIndexCount*sizeof(uint16_t));
|
||||
memcpy(values, tValues, count);
|
||||
|
||||
pAdopter = ucmp8_openAdopt(index, values, count); /* TODO: BAD API */
|
||||
query(pAdopter);
|
||||
ucmp8_close(pAdopter);
|
||||
|
||||
}
|
||||
ucmp8_close(&ucmp8Array1);
|
||||
ucmp8_close(&ucmp8Array2);
|
||||
ucmp8_close(ucmp8Array);
|
||||
|
||||
}
|
||||
|
||||
|
Loading…
Add table
Reference in a new issue