mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-861
Implemented backwards iterator for collation X-SVN-Rev: 3679
This commit is contained in:
parent
2352c3b293
commit
78a57a7680
6 changed files with 704 additions and 167 deletions
|
@ -14,58 +14,53 @@
|
|||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* Date Name Description
|
||||
*
|
||||
* 6/23/97 helena Adding comments to make code more readable.
|
||||
* 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
|
||||
* 12/10/99 aliu Ported Thai collation support from Java.
|
||||
* 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
|
||||
* 6/23/97 helena Adding comments to make code more readable.
|
||||
* 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
|
||||
* 12/10/99 aliu Ported Thai collation support from Java.
|
||||
* 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
|
||||
* 02/19/01 swquek Removed CollationElementsIterator() since it is
|
||||
* private constructor and no calls are made to it
|
||||
*/
|
||||
|
||||
// #include "unicode/sortkey.h"
|
||||
#include "unicode/coleitr.h"
|
||||
#include "ucolimp.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
// #include "unicode/chariter.h"
|
||||
#include "tables.h"
|
||||
// #include "tables.h"
|
||||
// #include "unicode/normlzr.h"
|
||||
// #include "unicode/unicode.h"
|
||||
// #include "tcoldata.h"
|
||||
// #include "ucmp32.h"
|
||||
|
||||
// Constants ------------------------------------------------------------------
|
||||
/* Constants --------------------------------------------------------------- */
|
||||
|
||||
/* synwee : public can't remove */
|
||||
int32_t const CollationElementIterator::NULLORDER = 0xffffffff;
|
||||
int32_t const CollationElementIterator::UNMAPPEDCHARVALUE = 0x7fff0000;
|
||||
// int32_t const CollationElementIterator::UNMAPPEDCHARVALUE = 0x7fff0000;
|
||||
|
||||
// CollationElementIterator public constructor/destructor ---------------------
|
||||
/* CollationElementIterator public constructor/destructor ------------------ */
|
||||
|
||||
CollationElementIterator::CollationElementIterator(
|
||||
const CollationElementIterator& other)
|
||||
: text(0),
|
||||
ownBuffer(new VectorOfInt(2)),
|
||||
reorderBuffer(0),
|
||||
expIndex(other.expIndex)
|
||||
const CollationElementIterator& other)
|
||||
: isDataOwned_(TRUE)
|
||||
{
|
||||
*this = other;
|
||||
}
|
||||
|
||||
CollationElementIterator::~CollationElementIterator()
|
||||
{
|
||||
delete text;
|
||||
text = NULL;
|
||||
bufferAlias = NULL;
|
||||
orderAlias = NULL;
|
||||
delete ownBuffer;
|
||||
delete reorderBuffer;
|
||||
ucol_closeElements(m_data_);
|
||||
}
|
||||
|
||||
// CollationElementIterator public methods ------------------------------------
|
||||
/* CollationElementIterator public methods --------------------------------- */
|
||||
|
||||
UTextOffset CollationElementIterator::getOffset() const
|
||||
{
|
||||
// Since the DecompositionIterator is doing the work of iterating through
|
||||
// the text string, we can just ask it what its offset is.
|
||||
return (text != NULL) ? text->getIndex() : 0;
|
||||
return ucol_getOffset(m_data_);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -75,6 +70,7 @@ UTextOffset CollationElementIterator::getOffset() const
|
|||
*/
|
||||
int32_t CollationElementIterator::next(UErrorCode& status)
|
||||
{
|
||||
/*
|
||||
if (text == NULL || U_FAILURE(status))
|
||||
return NULLORDER;
|
||||
|
||||
|
@ -111,9 +107,8 @@ int32_t CollationElementIterator::next(UErrorCode& status)
|
|||
// Ask the collator for this character's ordering.
|
||||
// Used to be RuleBasedCollator.getUnicodeOrder().
|
||||
// It can't be inlined in tblcoll.h file unfortunately.
|
||||
/*
|
||||
synwee : have to modify this part
|
||||
int32_t value = ucmp32_get(orderAlias->data->mapping, ch);
|
||||
|
||||
int32_t value = ucmp32_get(orderAlias->data->mapping, ch);
|
||||
|
||||
if (value == RuleBasedCollator::UNMAPPED)
|
||||
{
|
||||
|
@ -153,21 +148,22 @@ int32_t CollationElementIterator::next(UErrorCode& status)
|
|||
|
||||
return strengthOrder(value);
|
||||
*/
|
||||
return 0;
|
||||
return ucol_next(m_data_, &status);
|
||||
}
|
||||
|
||||
UBool CollationElementIterator::operator!=(
|
||||
const CollationElementIterator& other) const
|
||||
const CollationElementIterator& other) const
|
||||
{
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
UBool CollationElementIterator::operator==(const CollationElementIterator& that)
|
||||
const
|
||||
UBool CollationElementIterator::operator==(
|
||||
const CollationElementIterator& that) const
|
||||
{
|
||||
if (this == &that)
|
||||
return TRUE;
|
||||
|
||||
|
||||
/*
|
||||
if (*text != *(that.text))
|
||||
return FALSE;
|
||||
|
||||
|
@ -182,6 +178,9 @@ UBool CollationElementIterator::operator==(const CollationElementIterator& that)
|
|||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
*/
|
||||
|
||||
return m_data_ == that.m_data_;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -192,6 +191,7 @@ UBool CollationElementIterator::operator==(const CollationElementIterator& that)
|
|||
*/
|
||||
int32_t CollationElementIterator::previous(UErrorCode& status)
|
||||
{
|
||||
/*
|
||||
if (text == NULL || U_FAILURE(status))
|
||||
return NULLORDER;
|
||||
|
||||
|
@ -212,8 +212,7 @@ int32_t CollationElementIterator::previous(UErrorCode& status)
|
|||
|
||||
// Used to be RuleBasedCollator.getUnicodeOrder(). It can't be inlined in
|
||||
// tblcoll.h file unfortunately.
|
||||
/*
|
||||
|
||||
|
||||
int32_t value = ucmp32_get(orderAlias->data->mapping, ch);
|
||||
|
||||
if (value == RuleBasedCollator::UNMAPPED)
|
||||
|
@ -252,7 +251,7 @@ int32_t CollationElementIterator::previous(UErrorCode& status)
|
|||
|
||||
return strengthOrder(value);
|
||||
*/
|
||||
return 0;
|
||||
return ucol_previous(m_data_, &status);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -260,6 +259,7 @@ int32_t CollationElementIterator::previous(UErrorCode& status)
|
|||
*/
|
||||
void CollationElementIterator::reset()
|
||||
{
|
||||
/*
|
||||
if (text != NULL)
|
||||
{
|
||||
text->reset();
|
||||
|
@ -268,11 +268,14 @@ void CollationElementIterator::reset()
|
|||
|
||||
bufferAlias = NULL;
|
||||
expIndex = 0;
|
||||
*/
|
||||
ucol_reset(m_data_);
|
||||
}
|
||||
|
||||
void CollationElementIterator::setOffset(UTextOffset newOffset,
|
||||
UErrorCode& status)
|
||||
{
|
||||
/*
|
||||
if (U_FAILURE(status))
|
||||
return;
|
||||
|
||||
|
@ -280,6 +283,8 @@ void CollationElementIterator::setOffset(UTextOffset newOffset,
|
|||
text->setIndex(newOffset);
|
||||
|
||||
bufferAlias = NULL;
|
||||
*/
|
||||
ucol_setOffset(m_data_, newOffset, &status);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -290,7 +295,7 @@ void CollationElementIterator::setText(const UnicodeString& source,
|
|||
{
|
||||
if (U_FAILURE(status))
|
||||
return;
|
||||
|
||||
/*
|
||||
bufferAlias = 0;
|
||||
|
||||
if (text == NULL)
|
||||
|
@ -300,6 +305,17 @@ void CollationElementIterator::setText(const UnicodeString& source,
|
|||
text->setText(source, status);
|
||||
text->setMode(orderAlias->getDecomposition());
|
||||
}
|
||||
*/
|
||||
int32_t length = source.length();
|
||||
UChar *string = new UChar[length];
|
||||
source.extract(0, length, string);
|
||||
|
||||
m_data_->length_ = length;
|
||||
|
||||
if (m_data_->iteratordata_.isWritable &&
|
||||
m_data_->iteratordata_.string != NULL)
|
||||
uprv_free(m_data_->iteratordata_.string);
|
||||
init_collIterate(string, length, &m_data_->iteratordata_, TRUE);
|
||||
}
|
||||
|
||||
// Sets the source to the new character iterator.
|
||||
|
@ -309,6 +325,7 @@ void CollationElementIterator::setText(CharacterIterator& source,
|
|||
if (U_FAILURE(status))
|
||||
return;
|
||||
|
||||
/*
|
||||
bufferAlias = 0;
|
||||
|
||||
if (text == NULL)
|
||||
|
@ -318,38 +335,52 @@ void CollationElementIterator::setText(CharacterIterator& source,
|
|||
text->setMode(orderAlias->getDecomposition());
|
||||
text->setText(source, status);
|
||||
}
|
||||
*/
|
||||
int32_t length = source.getLength();
|
||||
UChar *buffer = new UChar[length];
|
||||
/*
|
||||
Using this constructor will prevent buffer from being removed when
|
||||
string gets removed
|
||||
*/
|
||||
UnicodeString string(buffer, length, length);
|
||||
source.getText(string);
|
||||
string.extract(0, length, buffer);
|
||||
m_data_->length_ = length;
|
||||
|
||||
if (m_data_->iteratordata_.isWritable &&
|
||||
m_data_->iteratordata_.string != NULL)
|
||||
uprv_free(m_data_->iteratordata_.string);
|
||||
init_collIterate(buffer, length, &m_data_->iteratordata_, TRUE);
|
||||
}
|
||||
|
||||
int32_t CollationElementIterator::strengthOrder(int32_t order) const
|
||||
{
|
||||
Collator::ECollationStrength s = orderAlias->getStrength();
|
||||
UCollationStrength s = ucol_getStrength(m_data_->collator_);
|
||||
// Mask off the unwanted differences.
|
||||
if (s == Collator::PRIMARY)
|
||||
if (s == UCOL_PRIMARY)
|
||||
order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
|
||||
else
|
||||
if (s == Collator::SECONDARY)
|
||||
if (s == UCOL_SECONDARY)
|
||||
order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
|
||||
|
||||
return order;
|
||||
}
|
||||
|
||||
// CollationElementIterator private constructors/destructors ------------------
|
||||
/* CollationElementIterator private constructors/destructors --------------- */
|
||||
|
||||
// This private method will never be called, but it makes the linker happy
|
||||
CollationElementIterator::CollationElementIterator() : text(0), bufferAlias(0),
|
||||
ownBuffer(new VectorOfInt(2)),
|
||||
reorderBuffer(0), expIndex(0),
|
||||
orderAlias(0)
|
||||
/*
|
||||
This private method will never be called, but it makes the linker happy
|
||||
CollationElementIterator::CollationElementIterator() : m_data_(0)
|
||||
{
|
||||
}
|
||||
*/
|
||||
|
||||
CollationElementIterator::CollationElementIterator(
|
||||
const RuleBasedCollator* order)
|
||||
: text(0), bufferAlias(0),
|
||||
ownBuffer(new VectorOfInt(2)),
|
||||
reorderBuffer(0), expIndex(0),
|
||||
orderAlias(order)
|
||||
const RuleBasedCollator* order)
|
||||
: isDataOwned_(TRUE)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
m_data_ = ucol_openElements(order->ucollator, NULL, 0, &status);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -359,17 +390,12 @@ CollationElementIterator::CollationElementIterator(
|
|||
CollationElementIterator::CollationElementIterator(
|
||||
const UnicodeString& sourceText,
|
||||
const RuleBasedCollator* order,
|
||||
UErrorCode& status)
|
||||
: text(NULL),
|
||||
bufferAlias(NULL),
|
||||
ownBuffer(new VectorOfInt(2)),
|
||||
reorderBuffer(0),
|
||||
expIndex(0),
|
||||
orderAlias(order)
|
||||
UErrorCode& status)
|
||||
{
|
||||
if (U_FAILURE(status))
|
||||
return;
|
||||
|
||||
|
||||
/*
|
||||
if ( sourceText.length() != 0 )
|
||||
{
|
||||
// A CollationElementIterator is really a two-layered beast.
|
||||
|
@ -386,6 +412,8 @@ CollationElementIterator::CollationElementIterator(
|
|||
if (text == NULL)
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
*/
|
||||
m_data_ = ucol_openElements(order->ucollator, NULL, 0, &status);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -393,20 +421,16 @@ CollationElementIterator::CollationElementIterator(
|
|||
* the source text using the specified collator
|
||||
*/
|
||||
CollationElementIterator::CollationElementIterator(
|
||||
const CharacterIterator& sourceText,
|
||||
const RuleBasedCollator* order,
|
||||
UErrorCode& status)
|
||||
: text(NULL),
|
||||
bufferAlias(NULL),
|
||||
ownBuffer(new VectorOfInt(2)),
|
||||
reorderBuffer(0),
|
||||
expIndex(0),
|
||||
orderAlias(order)
|
||||
const CharacterIterator& sourceText,
|
||||
const RuleBasedCollator* order,
|
||||
UErrorCode& status)
|
||||
: isDataOwned_(TRUE)
|
||||
{
|
||||
if (U_FAILURE(status))
|
||||
return;
|
||||
|
||||
// **** should I just drop this test? ****
|
||||
/*
|
||||
if ( sourceText.endIndex() != 0 )
|
||||
{
|
||||
// A CollationElementIterator is really a two-layered beast.
|
||||
|
@ -423,15 +447,29 @@ CollationElementIterator::CollationElementIterator(
|
|||
if (text == NULL)
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
*/
|
||||
int32_t length = sourceText.getLength();
|
||||
UChar *buffer = new UChar[length];
|
||||
/*
|
||||
Using this constructor will prevent buffer from being removed when
|
||||
string gets removed
|
||||
*/
|
||||
UnicodeString string(buffer, length, length);
|
||||
// synwee sourceText.getText(string);
|
||||
string.extract(0, length, buffer);
|
||||
|
||||
m_data_ = ucol_openElements(order->ucollator, NULL, 0, &status);
|
||||
// synwee ucol_setText(m_data_, buffer, length, TRUE, &status);
|
||||
}
|
||||
|
||||
// CollationElementIterator private methods -----------------------------------
|
||||
/* CollationElementIterator private methods -------------------------------- */
|
||||
|
||||
const CollationElementIterator& CollationElementIterator::operator=(
|
||||
const CollationElementIterator& other)
|
||||
const CollationElementIterator& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
/*
|
||||
expIndex = other.expIndex;
|
||||
delete text;
|
||||
text = (Normalizer*)other.text->clone();
|
||||
|
@ -455,6 +493,8 @@ const CollationElementIterator& CollationElementIterator::operator=(
|
|||
bufferAlias = other.bufferAlias;
|
||||
|
||||
orderAlias = other.orderAlias;
|
||||
*/
|
||||
this->m_data_ = other.m_data_;
|
||||
}
|
||||
|
||||
return *this;
|
||||
|
|
|
@ -3,6 +3,9 @@
|
|||
* Copyright (C) 1996-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* Modification history
|
||||
* Date Name Comments
|
||||
* 02/16/2001 synwee Added internal method getPrevSpecialCE
|
||||
*/
|
||||
|
||||
#include "ucolimp.h"
|
||||
|
@ -1089,6 +1092,140 @@ uint32_t ucol_getNextUCA(UChar ch, collIterate *collationSource, UErrorCode *sta
|
|||
return order; /* return the CE */
|
||||
}
|
||||
|
||||
/*
|
||||
* This function tries to get a CE from UCA, which should be always around
|
||||
* UChar is passed in in order to speed things up here is also the generation
|
||||
* of implicit CEs
|
||||
*/
|
||||
uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
|
||||
uint32_t length, UErrorCode *status)
|
||||
{
|
||||
uint32_t order;
|
||||
if (ch < 0xFF)
|
||||
order = UCA->latinOneMapping[ch];
|
||||
else
|
||||
order = ucmp32_get(UCA->mapping, ch);
|
||||
|
||||
if (order >= UCOL_NOT_FOUND)
|
||||
order = getSpecialPrevCE(UCA, order, collationSource, length, status);
|
||||
|
||||
if (order == UCOL_NOT_FOUND)
|
||||
{
|
||||
/*
|
||||
This is where we have to resort to algorithmical generation.
|
||||
We have to check if ch is possibly a first surrogate - then we need to
|
||||
take the next code unit and make a bigger CE
|
||||
*/
|
||||
UChar nextChar;
|
||||
const int
|
||||
SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7,
|
||||
LCount = 19, VCount = 21, TCount = 28,
|
||||
NCount = VCount * TCount, // 588
|
||||
SCount = LCount * NCount, // 11172
|
||||
LLimit = LBase + LCount, // 1113
|
||||
VLimit = VBase + VCount, // 1176
|
||||
TLimit = TBase + TCount, // 11C3
|
||||
SLimit = SBase + SCount; // D7A4
|
||||
|
||||
/*
|
||||
once we have failed to find a match for codepoint cp, and are in the
|
||||
implicit code.
|
||||
*/
|
||||
|
||||
unsigned int L = ch - SBase;
|
||||
if (L < SCount)
|
||||
{ /* since it is unsigned, catchs zero case too */
|
||||
|
||||
/*
|
||||
divide into pieces.
|
||||
we do it in this order since some compilers can do % and / in one
|
||||
operation
|
||||
*/
|
||||
int T = L % TCount;
|
||||
L /= TCount;
|
||||
int V = L % VCount;
|
||||
L /= VCount;
|
||||
|
||||
/* offset them */
|
||||
L += LBase;
|
||||
V += VBase;
|
||||
T += TBase;
|
||||
|
||||
/*
|
||||
return the first CE, but first put the rest into the expansion buffer
|
||||
*/
|
||||
if (!collationSource->JamoSpecial)
|
||||
{
|
||||
*(collationSource->CEpos ++) = ucmp32_get(UCA->mapping, V);
|
||||
if (T != TBase)
|
||||
*(collationSource->CEpos++) = ucmp32_get(UCA->mapping, T);
|
||||
/* return first one */
|
||||
return ucmp32_get(UCA->mapping, L);
|
||||
} else {
|
||||
/*
|
||||
Jamo is Special
|
||||
do recursive processing of L, V, and T with fetchCE (but T only if not
|
||||
equal to TBase!!)
|
||||
Since fetchCE returns a CE, and (potentially) stuffs items into the ce
|
||||
buffer,
|
||||
this is how it is done.
|
||||
*/
|
||||
/*
|
||||
int firstCE = fetchCE(L, ...);
|
||||
// set pointer, leave gap!
|
||||
int* lastExpansion = expansionBufferEnd++;
|
||||
*lastExpansion = fetchCE(V,...);
|
||||
if (T != TBase) {
|
||||
lastExpansion = expansionBufferEnd++; // set pointer, leave gap!
|
||||
*lastExpansion = fetchCE(T,...);
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
if (UTF_IS_SECOND_SURROGATE(ch))
|
||||
{
|
||||
if ((collationSource->len - collationSource->pos != length) &&
|
||||
(UTF_IS_FIRST_SURROGATE(nextChar = *collationSource->pos)))
|
||||
{
|
||||
uint32_t cp = ((ch << 10UL) + nextChar - ((0xd800 << 10UL) + 0xdc00));
|
||||
if (collationSource->pos != collationSource->writableBuffer)
|
||||
collationSource->pos --;
|
||||
else
|
||||
{
|
||||
collationSource->pos = collationSource->string +
|
||||
(length - (collationSource->len - collationSource->writableBuffer));
|
||||
collationSource->len = collationSource->string + length;
|
||||
collationSource->isThai = TRUE;
|
||||
}
|
||||
if ((cp & 0xFFFE) == 0xFFFE || (0xD800 <= cp && cp <= 0xDC00))
|
||||
return 0; /* illegal code value, use completely ignoreable! */
|
||||
|
||||
/*
|
||||
This is a code point minus 0x10000, that's what algorithm requires
|
||||
*/
|
||||
order = 0xE0010303 | (cp & 0xFFE00) << 8;
|
||||
*(collationSource->CEpos ++) = 0x80200080 | (cp & 0x001FF) << 22;
|
||||
collationSource->toReturn ++;
|
||||
}
|
||||
else
|
||||
return 0; /* completely ignorable */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* otherwise */
|
||||
if (UTF_IS_FIRST_SURROGATE(ch) || (ch & 0xFFFE) == 0xFFFE)
|
||||
return 0; /* completely ignorable */
|
||||
|
||||
/* Make up an artifical CE from code point as per UCA */
|
||||
order = 0xD08003C3 | (ch & 0xF000) << 12 | (ch & 0x0FE0) << 11;
|
||||
*(collationSource->CEpos ++) = 0x04000080 | (ch & 0x001F) << 27;
|
||||
collationSource->toReturn ++;
|
||||
}
|
||||
}
|
||||
return order; /* return the CE */
|
||||
}
|
||||
|
||||
/* This function handles the special CEs like contractions, expansions, surrogates, Thai */
|
||||
/* It is called by both getNextCE and getNextUCA */
|
||||
uint32_t getSpecialCE(const UCollator *coll, uint32_t CE, collIterate *source, UErrorCode *status) {
|
||||
|
@ -1201,6 +1338,175 @@ uint32_t getSpecialCE(const UCollator *coll, uint32_t CE, collIterate *source, U
|
|||
return CE;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function handles the special CEs like contractions, expansions,
|
||||
* surrogates, Thai.
|
||||
* It is called by both getPrevCE and getPrevUCA
|
||||
* synwee
|
||||
*/
|
||||
uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
|
||||
collIterate *source, uint32_t length,
|
||||
UErrorCode *status)
|
||||
{
|
||||
uint32_t count = 0;
|
||||
const uint32_t *CEOffset = NULL;
|
||||
const UChar *UCharOffset = NULL;
|
||||
UChar schar,
|
||||
tchar;
|
||||
const UChar *strend = NULL;
|
||||
const UChar *constart = NULL;
|
||||
uint32_t size;
|
||||
while (TRUE)
|
||||
{
|
||||
switch (getCETag(CE))
|
||||
{
|
||||
case NOT_FOUND_TAG:
|
||||
return CE;
|
||||
case SURROGATE_TAG:
|
||||
/* pending surrogate discussion with Markus and Mark */
|
||||
return UCOL_NOT_FOUND;
|
||||
case THAI_TAG:
|
||||
if (source->isThai == TRUE)
|
||||
{ /* if we encountered Thai prevowel & the string is not yet touched */
|
||||
source->isThai = FALSE;
|
||||
/*
|
||||
sigh... to cater for getNextCE, we'll have to modify and store the
|
||||
whole string instead of a substring as in getSpecialCE
|
||||
*/
|
||||
UCharOffset = source->pos;
|
||||
strend = source->len;
|
||||
size = strend - source->string;
|
||||
if (size > UCOL_WRITABLE_BUFFER_SIZE)
|
||||
{
|
||||
/*
|
||||
someone else has already allocated something
|
||||
*/
|
||||
if (source->writableBuffer != source->stackWritableBuffer)
|
||||
uprv_free(source->writableBuffer);
|
||||
source->writableBuffer =
|
||||
(UChar *)uprv_malloc(size * sizeof(UChar));
|
||||
source->isThai = FALSE;
|
||||
}
|
||||
UChar *sourceCopy = source->string;
|
||||
UChar *targetCopy = source->writableBuffer;
|
||||
while (sourceCopy < strend)
|
||||
{
|
||||
if (UCOL_ISTHAIPREVOWEL(*sourceCopy) &&
|
||||
/* This is the combination that needs to be swapped */
|
||||
UCOL_ISTHAIBASECONSONANT(*(sourceCopy + 1)))
|
||||
{
|
||||
*(targetCopy) = *(sourceCopy + count + 1);
|
||||
*(targetCopy+1) = *(sourceCopy + count);
|
||||
targetCopy+=2;
|
||||
sourceCopy+=2;
|
||||
}
|
||||
else
|
||||
*(targetCopy++) = *(sourceCopy++);
|
||||
}
|
||||
source->pos = source->writableBuffer +
|
||||
(UCharOffset - source->string);
|
||||
source->len = targetCopy;
|
||||
source->CEpos = source->toReturn = source->CEs;
|
||||
CE = UCOL_IGNORABLE;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
we have already played with the string, so treat Thai as a length one
|
||||
expansion
|
||||
*/
|
||||
/* find the offset to expansion table */
|
||||
CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
|
||||
CE = *CEOffset ++;
|
||||
}
|
||||
break;
|
||||
case CONTRACTION_TAG:
|
||||
/* This should handle contractions */
|
||||
while (TRUE)
|
||||
{
|
||||
/*
|
||||
First we position ourselves at the begining of contraction sequence
|
||||
*/
|
||||
constart = UCharOffset = (UChar *)coll->image + getContractOffset(CE);
|
||||
strend = source->len;
|
||||
|
||||
if (strend - source->pos == length)
|
||||
{ /* this is the start of string */
|
||||
CE = *(coll->contractionCEs +
|
||||
(UCharOffset - coll->contractionIndex));
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
Progressing to backwards block
|
||||
*/
|
||||
UCharOffset += *UCharOffset;
|
||||
|
||||
schar = *source->pos;
|
||||
while (schar > (tchar = *UCharOffset))
|
||||
UCharOffset ++;
|
||||
|
||||
if (schar != tchar)
|
||||
{
|
||||
/*
|
||||
we didn't find the correct codepoint. We can use either the first or
|
||||
the last CE
|
||||
*/
|
||||
if (tchar != 0xFFFF)
|
||||
UCharOffset = constart;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Move up one character */
|
||||
if (source->pos != source->writableBuffer)
|
||||
source->pos --;
|
||||
else
|
||||
{
|
||||
source->pos = source->string +
|
||||
(length - (source->len - source->writableBuffer));
|
||||
source->len = source->string + length;
|
||||
source->isThai = TRUE;
|
||||
}
|
||||
}
|
||||
CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
|
||||
if (!isContraction(CE))
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case EXPANSION_TAG:
|
||||
/*
|
||||
This should handle expansion.
|
||||
NOTE: we can encounter both continuations and expansions in an expansion!
|
||||
I have to decide where continuations are going to be dealt with
|
||||
*/
|
||||
/* find the offset to expansion table */
|
||||
CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
|
||||
size = getExpansionCount(CE);
|
||||
if (size != 0)
|
||||
/*
|
||||
if there are less than 16 elements in expansion, we don't terminate
|
||||
*/
|
||||
for (count = 0; count < size; count++)
|
||||
*(source->CEpos ++) = *CEOffset++;
|
||||
else
|
||||
/* else, we do */
|
||||
while (*CEOffset != 0)
|
||||
*(source->CEpos ++) = *CEOffset ++;
|
||||
source->toReturn = source->CEpos - 1;
|
||||
return *(source->toReturn --);
|
||||
case CHARSET_TAG:
|
||||
/* probably after 1.8 */
|
||||
return UCOL_NOT_FOUND;
|
||||
default:
|
||||
*status = U_INTERNAL_PROGRAM_ERROR;
|
||||
CE=0;
|
||||
break;
|
||||
}
|
||||
if (CE <= UCOL_NOT_FOUND) break;
|
||||
}
|
||||
return CE;
|
||||
}
|
||||
|
||||
/* This should really be a macro */
|
||||
/* However, it is used only when stack buffers are not sufficiently big, and then we're messed up performance wise */
|
||||
/* anyway */
|
||||
|
|
|
@ -1,18 +1,36 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
******************************************************************************
|
||||
* Copyright (C) 2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
******************************************************************************
|
||||
*
|
||||
* File ucoleitr.cpp
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 02/15/2001 synwee Modified all methods to process its own function
|
||||
* instead of calling the equivalent c++ api (coleitr.h)
|
||||
******************************************************************************/
|
||||
|
||||
#include "unicode/ucoleitr.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/coleitr.h"
|
||||
#include "unicode/sortkey.h"
|
||||
#include "ucolimp.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
#define BUFFER_LENGTH 100
|
||||
|
||||
typedef struct collIterate collIterator;
|
||||
|
||||
/* public methods ---------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Since this is going to be deprecated, I'll leave it as it is
|
||||
*/
|
||||
U_CAPI int32_t
|
||||
ucol_keyHashCode( const uint8_t* key,
|
||||
int32_t length)
|
||||
ucol_keyHashCode(const uint8_t *key,
|
||||
int32_t length)
|
||||
{
|
||||
CollationKey newKey(key, length);
|
||||
return newKey.hashCode();
|
||||
|
@ -20,88 +38,160 @@ ucol_keyHashCode( const uint8_t* key,
|
|||
|
||||
|
||||
UCollationElements*
|
||||
ucol_openElements( const UCollator *coll,
|
||||
const UChar *text,
|
||||
int32_t textLength,
|
||||
UErrorCode *status)
|
||||
ucol_openElements(const UCollator *coll,
|
||||
const UChar *text,
|
||||
int32_t textLength,
|
||||
UErrorCode *status)
|
||||
{
|
||||
int32_t len = (textLength == -1 ? u_strlen(text) : textLength);
|
||||
const UnicodeString src((UChar*)text, len, len);
|
||||
UCollationElements *result;
|
||||
|
||||
CollationElementIterator *iter = 0;
|
||||
iter = ((RuleBasedCollator*)coll)->createCollationElementIterator(src);
|
||||
if(iter == 0) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return 0;
|
||||
}
|
||||
if (U_FAILURE(*status))
|
||||
return NULL;
|
||||
|
||||
return (UCollationElements*) iter;
|
||||
result = (UCollationElements *)uprv_malloc(sizeof(UCollationElements));
|
||||
|
||||
result->collator_ = coll;
|
||||
|
||||
/* gets the correct length of the null-terminated string */
|
||||
if (textLength == -1)
|
||||
textLength = u_strlen(text);
|
||||
|
||||
result->length_ = textLength;
|
||||
init_collIterate(text, textLength, &result->iteratordata_, FALSE);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
U_CAPI void
|
||||
ucol_closeElements(UCollationElements *elems)
|
||||
{
|
||||
delete (CollationElementIterator*)elems;
|
||||
collIterate *ci = &elems->iteratordata_;
|
||||
if (ci->writableBuffer != ci->stackWritableBuffer)
|
||||
uprv_free(ci->writableBuffer);
|
||||
if (elems->iteratordata_.isWritable && elems->iteratordata_.string != NULL)
|
||||
uprv_free(elems->iteratordata_.string);
|
||||
uprv_free(elems);
|
||||
}
|
||||
|
||||
U_CAPI void
|
||||
ucol_reset(UCollationElements *elems)
|
||||
{
|
||||
((CollationElementIterator*)elems)->reset();
|
||||
collIterate *ci = &(elems->iteratordata_);
|
||||
ci->pos = ci->string;
|
||||
ci->len = ci->string + elems->length_;
|
||||
ci->CEpos = ci->toReturn = ci->CEs;
|
||||
/*
|
||||
problem here, that means we'll have to keep calculating the new thai set
|
||||
whenever we reset. maybe getSpecialCE should just do up the whole string
|
||||
instead of only a substring of it.
|
||||
*/
|
||||
ci->isThai = TRUE;
|
||||
if (ci->stackWritableBuffer != ci->writableBuffer)
|
||||
{
|
||||
uprv_free(ci->writableBuffer);
|
||||
ci->writableBuffer = ci->stackWritableBuffer;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI int32_t
|
||||
ucol_next( UCollationElements *elems,
|
||||
UErrorCode *status)
|
||||
ucol_next(UCollationElements *elems,
|
||||
UErrorCode *status)
|
||||
{
|
||||
if(U_FAILURE(*status)) return UCOL_NULLORDER;
|
||||
if (U_FAILURE(*status))
|
||||
return UCOL_NULLORDER;
|
||||
|
||||
return ((CollationElementIterator*)elems)->next(*status);
|
||||
int32_t result;
|
||||
UCOL_GETNEXTCE(result, elems->collator_, elems->iteratordata_, status);
|
||||
return result;
|
||||
}
|
||||
|
||||
U_CAPI int32_t
|
||||
ucol_previous( UCollationElements *elems,
|
||||
UErrorCode *status)
|
||||
ucol_previous(UCollationElements *elems,
|
||||
UErrorCode *status)
|
||||
{
|
||||
if(U_FAILURE(*status)) return UCOL_NULLORDER;
|
||||
if(U_FAILURE(*status))
|
||||
return UCOL_NULLORDER;
|
||||
|
||||
return ((CollationElementIterator*)elems)->previous(*status);
|
||||
int32_t result;
|
||||
UCOL_GETPREVCE(result, elems->collator_, elems->iteratordata_,
|
||||
elems->length_, status);
|
||||
return result;
|
||||
}
|
||||
|
||||
U_CAPI int32_t
|
||||
ucol_getMaxExpansion( const UCollationElements *elems,
|
||||
int32_t order)
|
||||
ucol_getMaxExpansion(const UCollationElements *elems,
|
||||
int32_t order)
|
||||
{
|
||||
return ((CollationElementIterator*)elems)->getMaxExpansion(order);
|
||||
/*
|
||||
synwee : requested this implementation from vladimir, need discussion. so
|
||||
hang on.
|
||||
*/
|
||||
/* return ((CollationElementIterator*)elems)->getMaxExpansion(order); */
|
||||
return -1;
|
||||
}
|
||||
|
||||
U_CAPI void
|
||||
ucol_setText(UCollationElements *elems,
|
||||
const UChar *text,
|
||||
int32_t textLength,
|
||||
UErrorCode *status)
|
||||
ucol_setText( UCollationElements *elems,
|
||||
const UChar *text,
|
||||
int32_t textLength,
|
||||
UErrorCode *status)
|
||||
{
|
||||
if(U_FAILURE(*status)) return;
|
||||
if (U_FAILURE(*status))
|
||||
return;
|
||||
|
||||
/* gets the correct length of the null-terminated string */
|
||||
if (textLength == -1)
|
||||
textLength = u_strlen(text);
|
||||
|
||||
int32_t len = (textLength == -1 ? u_strlen(text) : textLength);
|
||||
const UnicodeString src((UChar*)text, len, len);
|
||||
elems->length_ = textLength;
|
||||
|
||||
((CollationElementIterator*)elems)->setText(src, *status);
|
||||
if (elems->iteratordata_.isWritable && elems->iteratordata_.string != NULL)
|
||||
uprv_free(elems->iteratordata_.string);
|
||||
init_collIterate(text, textLength, &elems->iteratordata_, FALSE);
|
||||
}
|
||||
|
||||
U_CAPI UTextOffset
|
||||
ucol_getOffset(const UCollationElements *elems)
|
||||
{
|
||||
return ((CollationElementIterator*)elems)->getOffset();
|
||||
/* return ((CollationElementIterator*)elems)->getOffset(); */
|
||||
const collIterate *ci = &(elems->iteratordata_);
|
||||
if (ci->isThai == TRUE)
|
||||
return ci->pos - ci->string;
|
||||
|
||||
/*
|
||||
if it is a thai string with reversed elements, since getNextCE does not
|
||||
store only a substring in writeablebuffer, we'll have to do some calculation
|
||||
to get the offset out.
|
||||
need discussion to see if it is a better idea to store the whole string
|
||||
instead.
|
||||
*/
|
||||
return elems->length_ - (ci->len - ci->pos);
|
||||
}
|
||||
|
||||
U_CAPI void
|
||||
ucol_setOffset( UCollationElements *elems,
|
||||
UTextOffset offset,
|
||||
UErrorCode *status)
|
||||
ucol_setOffset(UCollationElements *elems,
|
||||
UTextOffset offset,
|
||||
UErrorCode *status)
|
||||
{
|
||||
if(U_FAILURE(*status)) return;
|
||||
|
||||
((CollationElementIterator*)elems)->setOffset(offset, *status);
|
||||
if (U_FAILURE(*status))
|
||||
return;
|
||||
|
||||
collIterate *ci = &(elems->iteratordata_);
|
||||
ci->pos = ci->string + offset;
|
||||
ci->CEpos = ci->toReturn = ci->CEs;
|
||||
/*
|
||||
problem here, that means we'll have to keep calculating the new thai set
|
||||
whenever we reset. maybe getSpecialCE should just do up the whole string
|
||||
instead of only a substring of it.
|
||||
*/
|
||||
ci->isThai = TRUE;
|
||||
if (ci->stackWritableBuffer != ci->writableBuffer)
|
||||
{
|
||||
uprv_free(ci->writableBuffer);
|
||||
ci->writableBuffer = ci->stackWritableBuffer;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1998-2000, International Business Machines
|
||||
* Copyright (C) 1998-2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -14,6 +14,11 @@
|
|||
*
|
||||
* created on: 2000dec11
|
||||
* created by: Vladimir Weinstein
|
||||
*
|
||||
* Modification history
|
||||
* Date Name Comments
|
||||
* 02/16/2001 synwee Added UCOL_GETPREVCE for the use in ucoleitr
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef UCOL_IMP_H
|
||||
|
@ -62,6 +67,28 @@ struct collIterate {
|
|||
UChar *writableBuffer;
|
||||
};
|
||||
|
||||
struct UCollationElements
|
||||
{
|
||||
/**
|
||||
* Locale specific collator for generating the collation elements
|
||||
*/
|
||||
const UCollator *collator_;
|
||||
/**
|
||||
* Normalization mode, not exactly the same as the data in collator_.
|
||||
* If collation strength requested is UCOL_IDENTICAL, this modes will be
|
||||
* UNORM_NONE other it follows collator_.
|
||||
*/
|
||||
UNormalizationMode normalization_;
|
||||
/**
|
||||
* Struct wrapper for source data
|
||||
*/
|
||||
collIterate iteratordata_;
|
||||
/**
|
||||
* Source text length
|
||||
*/
|
||||
int32_t length_;
|
||||
};
|
||||
|
||||
struct incrementalContext {
|
||||
UCharForwardIterator *source;
|
||||
void *sourceContext;
|
||||
|
@ -196,9 +223,61 @@ struct incrementalContext {
|
|||
} \
|
||||
}
|
||||
|
||||
/**
|
||||
* Macro that gets a simple CE.
|
||||
* So what it does is that it will first check the expansion buffer. If the
|
||||
* expansion buffer is not empty, ie the end pointer to the expansion buffer
|
||||
* is different from the start pointer, we return the collation element at the
|
||||
* return pointer and decrement it.
|
||||
* For more complicated CEs it resorts to getComplicatedCE.
|
||||
*/
|
||||
#define UCOL_GETPREVCE(order, coll, data, length, status) { \
|
||||
if (data.CEpos > data.CEs) { \
|
||||
(order) = *(data.toReturn --); \
|
||||
if (data.CEs == data.toReturn) { \
|
||||
data.CEpos = data.toReturn = data.CEs; \
|
||||
} \
|
||||
} \
|
||||
else { \
|
||||
if (data.len - data.pos == length) { \
|
||||
(order) = UCOL_NO_MORE_CES; \
|
||||
} \
|
||||
else { \
|
||||
UChar ch = *(data.pos); \
|
||||
if (data.pos != data.writableBuffer) { \
|
||||
data.pos --; \
|
||||
} \
|
||||
else { \
|
||||
data.pos = data.string + \
|
||||
(length - (data.len - data.writableBuffer)); \
|
||||
data.len = data.string + length; \
|
||||
data.isThai = TRUE; \
|
||||
} \
|
||||
if (ch <= 0xFF) { \
|
||||
(order) = (coll)->latinOneMapping[ch]; \
|
||||
} \
|
||||
else { \
|
||||
(order) = ucmp32_get((coll)->mapping, ch); \
|
||||
} \
|
||||
if ((order) >= UCOL_NOT_FOUND) { \
|
||||
(order) = getSpecialPrevCE((coll), (order), &(data), (length), \
|
||||
(status)); \
|
||||
if ((order) == UCOL_NOT_FOUND) { \
|
||||
(order) = ucol_getPrevUCA(ch, &(data), (length), (status)); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
uint32_t getSpecialCE(const UCollator *coll, uint32_t CE, collIterate *source, UErrorCode *status);
|
||||
uint32_t getSpecialPrevCE(const UCollator *coll, uint32_t CE,
|
||||
collIterate *source, uint32_t length,
|
||||
UErrorCode *status);
|
||||
U_CFUNC uint32_t ucol_getNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status);
|
||||
uint32_t ucol_getNextUCA(UChar ch, collIterate *collationSource, UErrorCode *status);
|
||||
uint32_t ucol_getPrevUCA(UChar ch, collIterate *collationSource,
|
||||
uint32_t length, UErrorCode *status);
|
||||
void incctx_cleanUpContext(incrementalContext *ctx);
|
||||
UChar incctx_appendChar(incrementalContext *ctx, UChar c);
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
/*
|
||||
*****************************************************************************************
|
||||
******************************************************************************
|
||||
* Copyright (C) 1997-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*****************************************************************************************
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
|
@ -14,12 +14,14 @@
|
|||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* Date Name Description
|
||||
*
|
||||
* 8/18/97 helena Added internal API documentation.
|
||||
* 08/03/98 erm Synched with 1.2 version CollationElementIterator.java
|
||||
* 12/10/99 aliu Ported Thai collation support from Java.
|
||||
* 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h)
|
||||
* 8/18/97 helena Added internal API documentation.
|
||||
* 08/03/98 erm Synched with 1.2 version CollationElementIterator.java
|
||||
* 12/10/99 aliu Ported Thai collation support from Java.
|
||||
* 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h)
|
||||
* 02/19/01 swquek Removed CollationElementsIterator() since it is
|
||||
* private constructor and no calls are made to it
|
||||
*/
|
||||
|
||||
#ifndef COLEITR_H
|
||||
|
@ -27,16 +29,22 @@
|
|||
|
||||
// #include "unicode/unistr.h"
|
||||
#include "unicode/tblcoll.h"
|
||||
#include "unicode/ucoleitr.h"
|
||||
|
||||
// #include "tables.h"
|
||||
// #include "unicode/chariter.h"
|
||||
|
||||
// have to do this because the include path in the main project does not have
|
||||
// tables.h.
|
||||
class VectorOfInt;
|
||||
// class VectorOfInt;
|
||||
// class Normalizer;
|
||||
// class VectorOfPToContractElement;
|
||||
// class RuleBasedCollator;
|
||||
|
||||
// typedef void * UCollationElements;
|
||||
// struct UCollationElements;
|
||||
typedef struct UCollationElements UCollationElements;
|
||||
|
||||
/**
|
||||
* The CollationElementIterator class is used as an iterator to walk through
|
||||
* each character of an international string. Use the iterator to return the
|
||||
|
@ -225,6 +233,8 @@ protected:
|
|||
|
||||
// CollationElementIterator protected constructors --------------------------
|
||||
|
||||
friend RuleBasedCollator;
|
||||
|
||||
/**
|
||||
* CollationElementIterator constructor. This takes the source string and the
|
||||
* collation object. The cursor will walk thru the source string based on the
|
||||
|
@ -265,15 +275,17 @@ protected:
|
|||
|
||||
private:
|
||||
|
||||
friend class RuleBasedCollator;
|
||||
// friend class RuleBasedCollator;
|
||||
|
||||
// CollationElementIterator private data members ----------------------------
|
||||
|
||||
static const int32_t UNMAPPEDCHARVALUE;
|
||||
// static const int32_t UNMAPPEDCHARVALUE;
|
||||
|
||||
/*
|
||||
Normalizer* text; // owning
|
||||
|
||||
VectorOfInt* bufferAlias; // not owned
|
||||
*/
|
||||
|
||||
/**
|
||||
* ownBuffer wants to be a subobject, not a pointer, but that means exposing
|
||||
|
@ -282,7 +294,7 @@ private:
|
|||
* is used to handle Thai collation; bufferAlias points to ownBuffer in some
|
||||
* situations. [j159 - aliu]
|
||||
*/
|
||||
VectorOfInt* ownBuffer;
|
||||
// VectorOfInt* ownBuffer;
|
||||
|
||||
/**
|
||||
* reorderBuffer is created on demand, so it doesn't want to be a subobject --
|
||||
|
@ -290,18 +302,30 @@ private:
|
|||
* conditions. Once created, it is reused for the life of this object. Because
|
||||
* of the implementation of VectorOfInt, it grows monotonically. [j159 - aliu]
|
||||
*/
|
||||
/*
|
||||
VectorOfInt* reorderBuffer;
|
||||
|
||||
int32_t expIndex;
|
||||
UnicodeString key;
|
||||
const RuleBasedCollator* orderAlias;
|
||||
*/
|
||||
|
||||
/**
|
||||
* Data wrapper for collation elements
|
||||
*/
|
||||
UCollationElements *m_data_;
|
||||
|
||||
/**
|
||||
* Indicates if m_data_ belongs to this object.
|
||||
*/
|
||||
UBool isDataOwned_;
|
||||
|
||||
// CollationElementIterator private constructor/destructor ------------------
|
||||
|
||||
/**
|
||||
* Default constructor.
|
||||
*/
|
||||
CollationElementIterator();
|
||||
/* CollationElementIterator(); */
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
|
@ -377,7 +401,7 @@ inline int32_t CollationElementIterator::tertiaryOrder(int32_t order)
|
|||
|
||||
inline int32_t CollationElementIterator::getMaxExpansion(int32_t order) const
|
||||
{
|
||||
return orderAlias->getMaxExpansion(order);
|
||||
return ucol_getMaxExpansion(m_data_, order);
|
||||
}
|
||||
|
||||
inline UBool CollationElementIterator::isIgnorable(int32_t order)
|
||||
|
|
|
@ -3,22 +3,32 @@
|
|||
* Copyright (C) 2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*/
|
||||
*
|
||||
* File ucoleitr.cpp
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 02/15/2001 synwee Modified all methods to process its own function
|
||||
* instead of calling the equivalent c++ api (coleitr.h)
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef UCOLEITR_H
|
||||
#define UCOLEITR_H
|
||||
|
||||
/** This indicates the last element in a UCollationElements has been consumed.
|
||||
*
|
||||
/**
|
||||
* This indicates the last element in a UCollationElements has been consumed.
|
||||
*/
|
||||
#define UCOL_NULLORDER 0xFFFFFFFF
|
||||
|
||||
#include "unicode/ucol.h"
|
||||
|
||||
/** The UCollationElements struct.
|
||||
* For usage in C programs.
|
||||
/**
|
||||
* The UCollationElements struct.
|
||||
* For usage in C programs.
|
||||
*/
|
||||
typedef void * UCollationElements;
|
||||
// typedef void * UCollationElements;
|
||||
typedef struct UCollationElements UCollationElements;
|
||||
|
||||
/**
|
||||
* The UCollationElements is used as an iterator to walk through
|
||||
|
@ -66,7 +76,7 @@ typedef void * UCollationElements;
|
|||
* a collation order is its primary order; the next 8 bits is the secondary
|
||||
* order and the last 8 bits is the tertiary order.
|
||||
*
|
||||
* @see Collator
|
||||
* @see UCollator
|
||||
*/
|
||||
|
||||
/**
|
||||
|
@ -76,13 +86,13 @@ typedef void * UCollationElements;
|
|||
* @param text The text to iterate over.
|
||||
* @param textLength The number of characters in text, or -1 if null-terminated
|
||||
* @param status A pointer to an UErrorCode to receive any errors.
|
||||
* @stable
|
||||
* @return a struct containing collation element information
|
||||
*/
|
||||
U_CAPI UCollationElements*
|
||||
ucol_openElements( const UCollator *coll,
|
||||
const UChar *text,
|
||||
int32_t textLength,
|
||||
UErrorCode *status);
|
||||
ucol_openElements(const UCollator *coll,
|
||||
const UChar *text,
|
||||
int32_t textLength,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* get a hash code for a key... Not very useful!
|
||||
|
@ -95,7 +105,6 @@ ucol_keyHashCode(const uint8_t* key, int32_t length);
|
|||
* Close a UCollationElements.
|
||||
* Once closed, a UCollationElements may no longer be used.
|
||||
* @param elems The UCollationElements to close.
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI void
|
||||
ucol_closeElements(UCollationElements *elems);
|
||||
|
@ -106,7 +115,6 @@ ucol_closeElements(UCollationElements *elems);
|
|||
* @param elems The UCollationElements to reset.
|
||||
* @see ucol_next
|
||||
* @see ucol_previous
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI void
|
||||
ucol_reset(UCollationElements *elems);
|
||||
|
@ -116,13 +124,11 @@ ucol_reset(UCollationElements *elems);
|
|||
* A single character may contain more than one collation element.
|
||||
* @param elems The UCollationElements containing the text.
|
||||
* @param status A pointer to an UErrorCode to receive any errors.
|
||||
* @return The next collation elements ordering, or \Ref{UCOL_NULLORDER} if the
|
||||
* end of the text is reached.
|
||||
* @stable
|
||||
* @return The next collation elements ordering, or \Ref{UCOL_NULLORDER} if
|
||||
* the end of the text is reached.
|
||||
*/
|
||||
U_CAPI int32_t
|
||||
ucol_next( UCollationElements *elems,
|
||||
UErrorCode *status);
|
||||
ucol_next(UCollationElements *elems, UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Get the ordering priority of the previous collation element in the text.
|
||||
|
@ -131,11 +137,9 @@ ucol_next( UCollationElements *elems,
|
|||
* @param status A pointer to an UErrorCode to receive any errors.
|
||||
* @return The previous collation elements ordering, or \Ref{UCOL_NULLORDER}
|
||||
* if the end of the text is reached.
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI int32_t
|
||||
ucol_previous( UCollationElements *elems,
|
||||
UErrorCode *status);
|
||||
ucol_previous(UCollationElements *elems, UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Get the maximum length of any expansion sequences that end with the
|
||||
|
@ -144,28 +148,24 @@ ucol_previous( UCollationElements *elems,
|
|||
* @param elems The UCollationElements containing the text.
|
||||
* @param order A collation order returned by previous or next.
|
||||
* @return The maximum length of any expansion sequences ending with the
|
||||
* specified order.
|
||||
* @stable
|
||||
* specified order.
|
||||
*/
|
||||
U_CAPI int32_t
|
||||
ucol_getMaxExpansion( const UCollationElements *elems,
|
||||
int32_t order);
|
||||
ucol_getMaxExpansion(const UCollationElements *elems, int32_t order);
|
||||
|
||||
/**
|
||||
* Set the text containing the collation elements.
|
||||
* This
|
||||
* @param elems The UCollationElements to set.
|
||||
* @param text The source text containing the collation elements.
|
||||
* @param textLength The length of text, or -1 if null-terminated.
|
||||
* @param status A pointer to an UErrorCode to receive any errors.
|
||||
* @see ucol_getText
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI void
|
||||
ucol_setText( UCollationElements *elems,
|
||||
const UChar *text,
|
||||
int32_t textLength,
|
||||
UErrorCode *status);
|
||||
ucol_setText( UCollationElements *elems,
|
||||
const UChar *text,
|
||||
int32_t textLength,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Get the offset of the current source character.
|
||||
|
@ -174,7 +174,6 @@ ucol_setText( UCollationElements *elems,
|
|||
* @param elems The UCollationElements to query.
|
||||
* @return The offset of the current source character.
|
||||
* @see ucol_setOffset
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI UTextOffset
|
||||
ucol_getOffset(const UCollationElements *elems);
|
||||
|
@ -186,11 +185,10 @@ ucol_getOffset(const UCollationElements *elems);
|
|||
* @param offset The desired character offset.
|
||||
* @param status A pointer to an UErrorCode to receive any errors.
|
||||
* @see ucol_getOffset
|
||||
* @stable
|
||||
*/
|
||||
U_CAPI void
|
||||
ucol_setOffset( UCollationElements *elems,
|
||||
UTextOffset offset,
|
||||
UErrorCode *status);
|
||||
ucol_setOffset(UCollationElements *elems,
|
||||
UTextOffset offset,
|
||||
UErrorCode *status);
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue