ICU-4521 Merge from branch, fixes for 64bit alignment & type conversions

X-SVN-Rev: 27670
This commit is contained in:
Peter Edberg 2010-02-25 06:33:29 +00:00
parent 63e3fc6df5
commit 411a93712a
10 changed files with 762 additions and 265 deletions

2
.gitattributes vendored
View file

@ -49,6 +49,8 @@ README text !eol
*.tri2 -text
icu4c/icu4c.css -text
icu4c/source/common/uvectr64.cpp -text
icu4c/source/common/uvectr64.h -text
icu4c/source/data/curr/pool.res -text
icu4c/source/data/in/nfc.nrm -text
icu4c/source/data/in/nfkc.nrm -text

View file

@ -73,7 +73,7 @@ LIBS = $(LIBICUDT) $(DEFAULT_LIBS)
OBJECTS = errorcode.o putil.o umath.o utypes.o uinvchar.o umutex.o ucln_cmn.o uinit.o uobject.o cmemory.o \
udata.o ucmndata.o udatamem.o umapfile.o udataswp.o ucol_swp.o utrace.o \
uhash.o uhash_us.o uenum.o ustrenum.o uvector.o ustack.o uvectr32.o \
uhash.o uhash_us.o uenum.o ustrenum.o uvector.o ustack.o uvectr32.o uvectr64.o \
ucnv.o ucnv_bld.o ucnv_cnv.o ucnv_io.o ucnv_cb.o ucnv_err.o ucnvlat1.o \
ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o \

View file

@ -986,6 +986,14 @@
RelativePath=".\uvectr32.h"
>
</File>
<File
RelativePath=".\uvectr64.cpp"
>
</File>
<File
RelativePath=".\uvectr64.h"
>
</File>
</Filter>
<Filter
Name="configuration"

View file

@ -0,0 +1,188 @@
/*
******************************************************************************
* Copyright (C) 1999-2010, International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************
*/
#include "uvectr64.h"
#include "cmemory.h"
U_NAMESPACE_BEGIN
#define DEFAULT_CAPACITY 8
/*
* Constants for hinting whether a key is an integer
* or a pointer. If a hint bit is zero, then the associated
* token is assumed to be an integer. This is needed for iSeries
*/
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UVector64)
UVector64::UVector64(UErrorCode &status) :
count(0),
capacity(0),
maxCapacity(0),
elements(NULL)
{
_init(DEFAULT_CAPACITY, status);
}
UVector64::UVector64(int32_t initialCapacity, UErrorCode &status) :
count(0),
capacity(0),
maxCapacity(0),
elements(0)
{
_init(initialCapacity, status);
}
void UVector64::_init(int32_t initialCapacity, UErrorCode &status) {
// Fix bogus initialCapacity values; avoid malloc(0)
if (initialCapacity < 1) {
initialCapacity = DEFAULT_CAPACITY;
}
if (maxCapacity>0 && maxCapacity<initialCapacity) {
initialCapacity = maxCapacity;
}
elements = (int64_t *)uprv_malloc(sizeof(int64_t)*initialCapacity);
if (elements == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
} else {
capacity = initialCapacity;
}
}
UVector64::~UVector64() {
uprv_free(elements);
elements = 0;
}
/**
* Assign this object to another (make this a copy of 'other').
*/
void UVector64::assign(const UVector64& other, UErrorCode &ec) {
if (ensureCapacity(other.count, ec)) {
setSize(other.count);
for (int32_t i=0; i<other.count; ++i) {
elements[i] = other.elements[i];
}
}
}
UBool UVector64::operator==(const UVector64& other) {
int32_t i;
if (count != other.count) return FALSE;
for (i=0; i<count; ++i) {
if (elements[i] != other.elements[i]) {
return FALSE;
}
}
return TRUE;
}
void UVector64::setElementAt(int64_t elem, int32_t index) {
if (0 <= index && index < count) {
elements[index] = elem;
}
/* else index out of range */
}
void UVector64::insertElementAt(int64_t elem, int32_t index, UErrorCode &status) {
// must have 0 <= index <= count
if (0 <= index && index <= count && ensureCapacity(count + 1, status)) {
for (int32_t i=count; i>index; --i) {
elements[i] = elements[i-1];
}
elements[index] = elem;
++count;
}
/* else index out of range */
}
void UVector64::removeAllElements(void) {
count = 0;
}
UBool UVector64::expandCapacity(int32_t minimumCapacity, UErrorCode &status) {
if (capacity >= minimumCapacity) {
return TRUE;
}
if (maxCapacity>0 && minimumCapacity>maxCapacity) {
status = U_BUFFER_OVERFLOW_ERROR;
return FALSE;
}
int32_t newCap = capacity * 2;
if (newCap < minimumCapacity) {
newCap = minimumCapacity;
}
if (maxCapacity > 0 && newCap > maxCapacity) {
newCap = maxCapacity;
}
int64_t* newElems = (int64_t *)uprv_realloc(elements, sizeof(int64_t)*newCap);
if (newElems == NULL) {
// We keep the original contents on the memory failure on realloc.
status = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
elements = newElems;
capacity = newCap;
return TRUE;
}
void UVector64::setMaxCapacity(int32_t limit) {
U_ASSERT(limit >= 0);
maxCapacity = limit;
if (maxCapacity < 0) {
maxCapacity = 0;
}
if (capacity <= maxCapacity || maxCapacity == 0) {
// Current capacity is within the new limit.
return;
}
// New maximum capacity is smaller than the current size.
// Realloc the storage to the new, smaller size.
int64_t* newElems = (int64_t *)uprv_realloc(elements, sizeof(int64_t)*maxCapacity);
if (newElems == NULL) {
// Realloc to smaller failed.
// Just keep what we had. No need to call it a failure.
return;
}
elements = newElems;
capacity = maxCapacity;
if (count > capacity) {
count = capacity;
}
}
/**
* Change the size of this vector as follows: If newSize is smaller,
* then truncate the array, possibly deleting held elements for i >=
* newSize. If newSize is larger, grow the array, filling in new
* slots with NULL.
*/
void UVector64::setSize(int32_t newSize) {
int32_t i;
if (newSize < 0) {
return;
}
if (newSize > count) {
UErrorCode ec = U_ZERO_ERROR;
if (!ensureCapacity(newSize, ec)) {
return;
}
for (i=count; i<newSize; ++i) {
elements[i] = 0;
}
}
count = newSize;
}
U_NAMESPACE_END

View file

@ -0,0 +1,277 @@
/*
**********************************************************************
* Copyright (C) 1999-2010, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
//
// UVector64 is a class implementing a vector of 64 bit integers.
// It is similar to UVector32, but holds int64_t values rather than int32_t.
// Most of the code is unchanged from UVector.
//
#ifndef UVECTOR64_H
#define UVECTOR64_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "uhash.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
/**
* <p>Ultralightweight C++ implementation of an <tt>int64_t</tt> vector
* that has a subset of methods from UVector32
*
* <p>This is a very simple implementation, written to satisfy an
* immediate porting need. As such, it is not completely fleshed out,
* and it aims for simplicity and conformity. Nonetheless, it serves
* its purpose (porting code from java that uses java.util.Vector)
* well, and it could be easily made into a more robust vector class.
*
* <p><b>Design notes</b>
*
* <p>There is index bounds checking, but little is done about it. If
* indices are out of bounds, either nothing happens, or zero is
* returned. We <em>do</em> avoid indexing off into the weeds.
*
* <p>There is detection of out of memory, but the handling is very
* coarse-grained -- similar to UnicodeString's protocol, but even
* coarser. The class contains <em>one static flag</em> that is set
* when any call to <tt>new</tt> returns zero. This allows the caller
* to use several vectors and make just one check at the end to see if
* a memory failure occurred. This is more efficient than making a
* check after each call on each vector when doing many operations on
* multiple vectors. The single static flag works best when memory
* failures are infrequent, and when recovery options are limited or
* nonexistent.
*
* <p><b>To do</b>
*
* <p>Improve the handling of index out of bounds errors.
*
*/
class U_COMMON_API UVector64 : public UObject {
private:
int32_t count;
int32_t capacity;
int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow.
int64_t* elements;
public:
UVector64(UErrorCode &status);
UVector64(int32_t initialCapacity, UErrorCode &status);
virtual ~UVector64();
/**
* Assign this object to another (make this a copy of 'other').
* Use the 'assign' function to assign each element.
*/
void assign(const UVector64& other, UErrorCode &ec);
/**
* Compare this vector with another. They will be considered
* equal if they are of the same size and all elements are equal,
* as compared using this object's comparer.
*/
UBool operator==(const UVector64& other);
/**
* Equivalent to !operator==()
*/
inline UBool operator!=(const UVector64& other);
//------------------------------------------------------------
// subset of java.util.Vector API
//------------------------------------------------------------
void addElement(int64_t elem, UErrorCode &status);
void setElementAt(int64_t elem, int32_t index);
void insertElementAt(int64_t elem, int32_t index, UErrorCode &status);
int64_t elementAti(int32_t index) const;
//UBool equals(const UVector64 &other) const;
int64_t lastElementi(void) const;
//int32_t indexOf(int64_t elem, int32_t startIndex = 0) const;
//UBool contains(int64_t elem) const;
//UBool containsAll(const UVector64& other) const;
//UBool removeAll(const UVector64& other);
//UBool retainAll(const UVector64& other);
//void removeElementAt(int32_t index);
void removeAllElements();
int32_t size(void) const;
//UBool isEmpty(void) const;
// Inline. Use this one for speedy size check.
inline UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status);
// Out-of-line, handles actual growth. Called by ensureCapacity() when necessary.
UBool expandCapacity(int32_t minimumCapacity, UErrorCode &status);
/**
* Change the size of this vector as follows: If newSize is
* smaller, then truncate the array, possibly deleting held
* elements for i >= newSize. If newSize is larger, grow the
* array, filling in new slows with zero.
*/
void setSize(int32_t newSize);
//------------------------------------------------------------
// New API
//------------------------------------------------------------
//UBool containsNone(const UVector64& other) const;
//void sortedInsert(int64_t elem, UErrorCode& ec);
/**
* Returns a pointer to the internal array holding the vector.
*/
int64_t *getBuffer() const;
/**
* Set the maximum allowed buffer capacity for this vector/stack.
* Default with no limit set is unlimited, go until malloc() fails.
* A Limit of zero means unlimited capacity.
* Units are vector elements (64 bits each), not bytes.
*/
void setMaxCapacity(int32_t limit);
/**
* ICU "poor man's RTTI", returns a UClassID for this class.
*/
static UClassID U_EXPORT2 getStaticClassID();
/**
* ICU "poor man's RTTI", returns a UClassID for the actual class.
*/
virtual UClassID getDynamicClassID() const;
private:
void _init(int32_t initialCapacity, UErrorCode &status);
// Disallow
UVector64(const UVector64&);
// Disallow
UVector64& operator=(const UVector64&);
// API Functions for Stack operations.
// In the original UVector, these were in a separate derived class, UStack.
// Here in UVector64, they are all together.
public:
//UBool empty(void) const; // TODO: redundant, same as empty(). Remove it?
//int64_t peeki(void) const;
int64_t popi(void);
int64_t push(int64_t i, UErrorCode &status);
int64_t *reserveBlock(int32_t size, UErrorCode &status);
int64_t *popFrame(int32_t size);
};
// UVector64 inlines
inline UBool UVector64::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
if (capacity >= minimumCapacity) {
return TRUE;
} else {
return expandCapacity(minimumCapacity, status);
}
}
inline int64_t UVector64::elementAti(int32_t index) const {
return (0 <= index && index < count) ? elements[index] : 0;
}
inline void UVector64::addElement(int64_t elem, UErrorCode &status) {
if (ensureCapacity(count + 1, status)) {
elements[count] = elem;
count++;
}
}
inline int64_t *UVector64::reserveBlock(int32_t size, UErrorCode &status) {
if (ensureCapacity(count+size, status) == FALSE) {
return NULL;
}
int64_t *rp = elements+count;
count += size;
return rp;
}
inline int64_t *UVector64::popFrame(int32_t size) {
U_ASSERT(count >= size);
count -= size;
if (count < 0) {
count = 0;
}
return elements+count-size;
}
inline int32_t UVector64::size(void) const {
return count;
}
inline int64_t UVector64::lastElementi(void) const {
return elementAti(count-1);
}
inline UBool UVector64::operator!=(const UVector64& other) {
return !operator==(other);
}
inline int64_t *UVector64::getBuffer() const {
return elements;
}
// UStack inlines
inline int64_t UVector64::push(int64_t i, UErrorCode &status) {
addElement(i, status);
return i;
}
inline int64_t UVector64::popi(void) {
int64_t result = 0;
if (count > 0) {
count--;
result = elements[count];
}
return result;
}
U_NAMESPACE_END
#endif

View file

@ -26,6 +26,7 @@
#include "cmemory.h"
#include "cstring.h"
#include "uvectr32.h"
#include "uvectr64.h"
#include "uassert.h"
#include "ucln_in.h"
#include "uinvchar.h"
@ -292,7 +293,7 @@ void RegexCompile::compile(
// present in the saved state: the input string position (int64_t) and
// the position in the compiled pattern.
//
fRXPat->fFrameSize+=3;
fRXPat->fFrameSize+=RESTACKFRAME_HDRCOUNT;
//
// Optimization pass 1: NOPs, back-references, and case-folding
@ -400,7 +401,7 @@ UBool RegexCompile::doParseActions(int32_t action)
// side fails to match and backtracks. Locate the position for the
// save from the location on the top of the parentheses stack.
int32_t savePosition = fParenStack.popi();
int32_t op = fRXPat->fCompiledPat->elementAti(savePosition);
int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(savePosition);
U_ASSERT(URX_TYPE(op) == URX_NOP); // original contents of reserved location
op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+1);
fRXPat->fCompiledPat->setElementAt(op, savePosition);
@ -433,10 +434,10 @@ UBool RegexCompile::doParseActions(int32_t action)
// - NOP, which may later be replaced by a save-state if there
// is an '|' alternation within the parens.
//
// Each capture group gets three double-width slots in the save stack frame:
// 0-1: Capture Group start position (in input string being matched.)
// 2-3: Capture Group end position.
// 4-5: Start of Match-in-progress.
// Each capture group gets three slots in the save stack frame:
// 0: Capture Group start position (in input string being matched.)
// 1: Capture Group end position.
// 2: Start of Match-in-progress.
// The first two locations are for a completed capture group, and are
// referred to by back references and the like.
// The third location stores the capture start position when an START_CAPTURE is
@ -444,8 +445,8 @@ UBool RegexCompile::doParseActions(int32_t action)
// END_CAPTURE is encountered.
{
fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
int32_t varsLoc = fRXPat->fFrameSize; // Reserve five slots in match stack frame.
fRXPat->fFrameSize += 6;
int32_t varsLoc = fRXPat->fFrameSize; // Reserve three slots in match stack frame.
fRXPat->fFrameSize += 3;
int32_t cop = URX_BUILD(URX_START_CAPTURE, varsLoc);
fRXPat->fCompiledPat->addElement(cop, *fStatus);
fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
@ -539,10 +540,10 @@ UBool RegexCompile::doParseActions(int32_t action)
// 8. code for parenthesized stuff.
// 9. LA_END
//
// Three data slots are reserved, for saving the stack ptr and the (double-width) input position.
// Two data slots are reserved, for saving the stack ptr and the input position.
{
int32_t dataLoc = fRXPat->fDataSize;
fRXPat->fDataSize += 3;
fRXPat->fDataSize += 2;
int32_t op = URX_BUILD(URX_LA_START, dataLoc);
fRXPat->fCompiledPat->addElement(op, *fStatus);
@ -583,10 +584,9 @@ UBool RegexCompile::doParseActions(int32_t action)
// 6. BACKTRACK // code in block succeeded, so neg. lookahead fails.
// 7. END_LA // Restore match region, in case look-ahead was using
// an alternate (transparent) region.
// Three data slots are reserved, for saving the stack ptr and the (double-width) input position.
{
int32_t dataLoc = fRXPat->fDataSize;
fRXPat->fDataSize += 3;
fRXPat->fDataSize += 2;
int32_t op = URX_BUILD(URX_LA_START, dataLoc);
fRXPat->fCompiledPat->addElement(op, *fStatus);
@ -625,12 +625,12 @@ UBool RegexCompile::doParseActions(int32_t action)
// Allocate a block of matcher data, to contain (when running a match)
// 0: Stack ptr on entry
// 1: Input Index on entry
// 2-3: Start index of match current match attempt.
// 4-5: Original Input String len.
// 2: Start index of match current match attempt.
// 3: Original Input String len.
// Allocate data space
int32_t dataLoc = fRXPat->fDataSize;
fRXPat->fDataSize += 6;
fRXPat->fDataSize += 4;
// Emit URX_LB_START
int32_t op = URX_BUILD(URX_LB_START, dataLoc);
@ -678,12 +678,12 @@ UBool RegexCompile::doParseActions(int32_t action)
// Allocate a block of matcher data, to contain (when running a match)
// 0: Stack ptr on entry
// 1: Input Index on entry
// 2-3: Start index of match current match attempt.
// 4-5: Original Input String len.
// 2: Start index of match current match attempt.
// 3: Original Input String len.
// Allocate data space
int32_t dataLoc = fRXPat->fDataSize;
fRXPat->fDataSize += 6;
fRXPat->fDataSize += 4;
// Emit URX_LB_START
int32_t op = URX_BUILD(URX_LB_START, dataLoc);
@ -765,14 +765,14 @@ UBool RegexCompile::doParseActions(int32_t action)
// Check for simple constructs, which may get special optimized code.
if (topLoc == fRXPat->fCompiledPat->size() - 1) {
int32_t repeatedOp = fRXPat->fCompiledPat->elementAti(topLoc);
int32_t repeatedOp = (int32_t)fRXPat->fCompiledPat->elementAti(topLoc);
if (URX_TYPE(repeatedOp) == URX_SETREF) {
// Emit optimized code for [char set]+
int32_t loopOpI = URX_BUILD(URX_LOOP_SR_I, URX_VAL(repeatedOp));
fRXPat->fCompiledPat->addElement(loopOpI, *fStatus);
frameLoc = fRXPat->fFrameSize;
fRXPat->fFrameSize += 2; // double-width index
fRXPat->fFrameSize++;
int32_t loopOpC = URX_BUILD(URX_LOOP_C, frameLoc);
fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
break;
@ -792,7 +792,7 @@ UBool RegexCompile::doParseActions(int32_t action)
}
fRXPat->fCompiledPat->addElement(loopOpI, *fStatus);
frameLoc = fRXPat->fFrameSize;
fRXPat->fFrameSize += 2; // double-width index
fRXPat->fFrameSize++;
int32_t loopOpC = URX_BUILD(URX_LOOP_C, frameLoc);
fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
break;
@ -809,7 +809,7 @@ UBool RegexCompile::doParseActions(int32_t action)
// Emit the code sequence that can handle it.
insertOp(topLoc);
frameLoc = fRXPat->fFrameSize;
fRXPat->fFrameSize += 2; // double-width index
fRXPat->fFrameSize++;
int32_t op = URX_BUILD(URX_STO_INP_LOC, frameLoc);
fRXPat->fCompiledPat->setElementAt(op, topLoc);
@ -908,14 +908,14 @@ UBool RegexCompile::doParseActions(int32_t action)
// Check for simple *, where the construct being repeated
// compiled to single opcode, and might be optimizable.
if (topLoc == fRXPat->fCompiledPat->size() - 1) {
int32_t repeatedOp = fRXPat->fCompiledPat->elementAti(topLoc);
int32_t repeatedOp = (int32_t)fRXPat->fCompiledPat->elementAti(topLoc);
if (URX_TYPE(repeatedOp) == URX_SETREF) {
// Emit optimized code for a [char set]*
int32_t loopOpI = URX_BUILD(URX_LOOP_SR_I, URX_VAL(repeatedOp));
fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc);
dataLoc = fRXPat->fFrameSize;
fRXPat->fFrameSize += 2; // double-width index
fRXPat->fFrameSize++;
int32_t loopOpC = URX_BUILD(URX_LOOP_C, dataLoc);
fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
break;
@ -935,7 +935,7 @@ UBool RegexCompile::doParseActions(int32_t action)
}
fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc);
dataLoc = fRXPat->fFrameSize;
fRXPat->fFrameSize += 2; // double-width index
fRXPat->fFrameSize++;
int32_t loopOpC = URX_BUILD(URX_LOOP_C, dataLoc);
fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
break;
@ -953,7 +953,7 @@ UBool RegexCompile::doParseActions(int32_t action)
if (minMatchLength(saveStateLoc, fRXPat->fCompiledPat->size()-1) == 0) {
insertOp(saveStateLoc);
dataLoc = fRXPat->fFrameSize;
fRXPat->fFrameSize += 2; // double-width index
fRXPat->fFrameSize++;
int32_t op = URX_BUILD(URX_STO_INP_LOC, dataLoc);
fRXPat->fCompiledPat->setElementAt(op, saveStateLoc+1);
@ -1060,7 +1060,7 @@ UBool RegexCompile::doParseActions(int32_t action)
int32_t op = URX_BUILD(URX_STO_SP, varLoc);
fRXPat->fCompiledPat->setElementAt(op, topLoc);
int32_t loopOp = fRXPat->fCompiledPat->popi();
int32_t loopOp = (int32_t)fRXPat->fCompiledPat->popi();
U_ASSERT(URX_TYPE(loopOp) == URX_CTR_LOOP && URX_VAL(loopOp) == topLoc);
loopOp++; // point LoopOp after the just-inserted STO_SP
fRXPat->fCompiledPat->push(loopOp, *fStatus);
@ -1768,7 +1768,7 @@ void RegexCompile::literalChar(UChar32 c) {
// If the last thing compiled into the pattern was not a literal char,
// force this new literal char to begin a new string, and not append to the previous.
op = fRXPat->fCompiledPat->lastElementi();
op = (int32_t)fRXPat->fCompiledPat->lastElementi();
opType = URX_TYPE(op);
if (!(opType == URX_STRING_LEN || opType == URX_ONECHAR || opType == URX_ONECHAR_I)) {
fixLiterals();
@ -1784,7 +1784,7 @@ void RegexCompile::literalChar(UChar32 c) {
return;
}
op = fRXPat->fCompiledPat->lastElementi();
op = (int32_t)fRXPat->fCompiledPat->lastElementi();
opType = URX_TYPE(op);
U_ASSERT(opType == URX_ONECHAR || opType == URX_ONECHAR_I || opType == URX_STRING_LEN);
@ -1888,7 +1888,7 @@ void RegexCompile::fixLiterals(UBool split) {
// If the last operation from the compiled pattern is not a string,
// nothing needs to be done
op = fRXPat->fCompiledPat->lastElementi();
op = (int32_t)fRXPat->fCompiledPat->lastElementi();
opType = URX_TYPE(op);
if (opType != URX_STRING_LEN) {
return;
@ -1942,7 +1942,7 @@ void RegexCompile::fixLiterals(UBool split) {
//
//------------------------------------------------------------------------------
void RegexCompile::insertOp(int32_t where) {
UVector32 *code = fRXPat->fCompiledPat;
UVector64 *code = fRXPat->fCompiledPat;
U_ASSERT(where>0 && where < code->size());
int32_t nop = URX_BUILD(URX_NOP, 0);
@ -1952,7 +1952,7 @@ void RegexCompile::insertOp(int32_t where) {
// were moved down by the insert. Fix them.
int32_t loc;
for (loc=0; loc<code->size(); loc++) {
int32_t op = code->elementAti(loc);
int32_t op = (int32_t)code->elementAti(loc);
int32_t opType = URX_TYPE(op);
int32_t opValue = URX_VAL(op);
if ((opType == URX_JMP ||
@ -2070,7 +2070,7 @@ void RegexCompile::handleCloseParen() {
break;
}
U_ASSERT(patIdx>0 && patIdx <= fRXPat->fCompiledPat->size());
patOp = fRXPat->fCompiledPat->elementAti(patIdx);
patOp = (int32_t)fRXPat->fCompiledPat->elementAti(patIdx);
U_ASSERT(URX_VAL(patOp) == 0); // Branch target for JMP should not be set.
patOp |= fRXPat->fCompiledPat->size(); // Set it now.
fRXPat->fCompiledPat->setElementAt(patOp, patIdx);
@ -2098,7 +2098,7 @@ void RegexCompile::handleCloseParen() {
// The frame offset of the variables for this cg is obtained from the
// start capture op and put it into the end-capture op.
{
int32_t captureOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen+1);
int32_t captureOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen+1);
U_ASSERT(URX_TYPE(captureOp) == URX_START_CAPTURE);
int32_t frameVarLocation = URX_VAL(captureOp);
@ -2111,7 +2111,7 @@ void RegexCompile::handleCloseParen() {
// Insert a LD_SP operation to restore the state stack to the position
// it was when the atomic parens were entered.
{
int32_t stoOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen+1);
int32_t stoOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen+1);
U_ASSERT(URX_TYPE(stoOp) == URX_STO_SP);
int32_t stoLoc = URX_VAL(stoOp);
int32_t ldOp = URX_BUILD(URX_LD_SP, stoLoc);
@ -2121,7 +2121,7 @@ void RegexCompile::handleCloseParen() {
case lookAhead:
{
int32_t startOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen-5);
int32_t startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen-5);
U_ASSERT(URX_TYPE(startOp) == URX_LA_START);
int32_t dataLoc = URX_VAL(startOp);
int32_t op = URX_BUILD(URX_LA_END, dataLoc);
@ -2132,7 +2132,7 @@ void RegexCompile::handleCloseParen() {
case negLookAhead:
{
// See comment at doOpenLookAheadNeg
int32_t startOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen-1);
int32_t startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen-1);
U_ASSERT(URX_TYPE(startOp) == URX_LA_START);
int32_t dataLoc = URX_VAL(startOp);
int32_t op = URX_BUILD(URX_LA_END, dataLoc);
@ -2144,7 +2144,7 @@ void RegexCompile::handleCloseParen() {
// Patch the URX_SAVE near the top of the block.
// The destination of the SAVE is the final LA_END that was just added.
int32_t saveOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen);
int32_t saveOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen);
U_ASSERT(URX_TYPE(saveOp) == URX_STATE_SAVE);
int32_t dest = fRXPat->fCompiledPat->size()-1;
saveOp = URX_BUILD(URX_STATE_SAVE, dest);
@ -2157,7 +2157,7 @@ void RegexCompile::handleCloseParen() {
// See comment at doOpenLookBehind.
// Append the URX_LB_END and URX_LA_END to the compiled pattern.
int32_t startOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen-4);
int32_t startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen-4);
U_ASSERT(URX_TYPE(startOp) == URX_LB_START);
int32_t dataLoc = URX_VAL(startOp);
int32_t op = URX_BUILD(URX_LB_END, dataLoc);
@ -2192,7 +2192,7 @@ void RegexCompile::handleCloseParen() {
// See comment at doOpenLookBehindNeg.
// Append the URX_LBN_END to the compiled pattern.
int32_t startOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen-5);
int32_t startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen-5);
U_ASSERT(URX_TYPE(startOp) == URX_LB_START);
int32_t dataLoc = URX_VAL(startOp);
int32_t op = URX_BUILD(URX_LBN_END, dataLoc);
@ -2373,7 +2373,7 @@ UBool RegexCompile::compileInlineInterval() {
// Pick up the opcode that is to be repeated
//
int32_t op = fRXPat->fCompiledPat->elementAti(topOfBlock);
int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(topOfBlock);
// Compute the pattern location where the inline sequence
// will end, and set up the state save op that will be needed.
@ -2446,7 +2446,7 @@ void RegexCompile::matchStartType() {
}
for (loc = 3; loc<end; loc++) {
op = fRXPat->fCompiledPat->elementAti(loc);
op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
opType = URX_TYPE(op);
// The loop is advancing linearly through the pattern.
@ -2685,7 +2685,7 @@ void RegexCompile::matchStartType() {
case URX_STRING:
{
loc++;
int32_t stringLenOp = fRXPat->fCompiledPat->elementAti(loc);
int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
int32_t stringLen = URX_VAL(stringLenOp);
U_ASSERT(URX_TYPE(stringLenOp) == URX_STRING_LEN);
U_ASSERT(stringLenOp >= 2);
@ -2714,7 +2714,7 @@ void RegexCompile::matchStartType() {
// attempt a string search for possible match positions. But we
// do update the set of possible starting characters.
loc++;
int32_t stringLenOp = fRXPat->fCompiledPat->elementAti(loc);
int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
int32_t stringLen = URX_VAL(stringLenOp);
U_ASSERT(URX_TYPE(stringLenOp) == URX_STRING_LEN);
U_ASSERT(stringLenOp >= 2);
@ -2743,9 +2743,9 @@ void RegexCompile::matchStartType() {
// move loc forwards to the end of the loop, skipping over the body.
// If the min count is > 0,
// continue normal processing of the body of the loop.
int32_t loopEndLoc = fRXPat->fCompiledPat->elementAti(loc+1);
int32_t loopEndLoc = (int32_t)fRXPat->fCompiledPat->elementAti(loc+1);
loopEndLoc = URX_VAL(loopEndLoc);
int32_t minLoopCount = fRXPat->fCompiledPat->elementAti(loc+2);
int32_t minLoopCount = (int32_t)fRXPat->fCompiledPat->elementAti(loc+2);
if (minLoopCount == 0) {
// Min Loop Count of 0, treat like a forward branch and
// move the current minimum length up to the target
@ -2787,7 +2787,7 @@ void RegexCompile::matchStartType() {
int32_t depth = (opType == URX_LA_START? 2: 1);
for (;;) {
loc++;
op = fRXPat->fCompiledPat->elementAti(loc);
op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
if (URX_TYPE(op) == URX_LA_START) {
depth+=2;
}
@ -2925,7 +2925,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
}
for (loc = start; loc<=end; loc++) {
op = fRXPat->fCompiledPat->elementAti(loc);
op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
opType = URX_TYPE(op);
// The loop is advancing linearly through the pattern.
@ -3034,7 +3034,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
case URX_STRING_I:
{
loc++;
int32_t stringLenOp = fRXPat->fCompiledPat->elementAti(loc);
int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
currentLen += URX_VAL(stringLenOp);
}
break;
@ -3048,9 +3048,9 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
// move loc forwards to the end of the loop, skipping over the body.
// If the min count is > 0,
// continue normal processing of the body of the loop.
int32_t loopEndLoc = fRXPat->fCompiledPat->elementAti(loc+1);
int32_t loopEndLoc = (int32_t)fRXPat->fCompiledPat->elementAti(loc+1);
loopEndLoc = URX_VAL(loopEndLoc);
int32_t minLoopCount = fRXPat->fCompiledPat->elementAti(loc+2);
int32_t minLoopCount = (int32_t)fRXPat->fCompiledPat->elementAti(loc+2);
if (minLoopCount == 0) {
loc = loopEndLoc;
} else {
@ -3085,7 +3085,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
int32_t depth = (opType == URX_LA_START? 2: 1);;
for (;;) {
loc++;
op = fRXPat->fCompiledPat->elementAti(loc);
op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
if (URX_TYPE(op) == URX_LA_START) {
// The boilerplate for look-ahead includes two LA_END insturctions,
// Depth will be decremented by each one when it is seen.
@ -3179,7 +3179,7 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) {
}
for (loc = start; loc<=end; loc++) {
op = fRXPat->fCompiledPat->elementAti(loc);
op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
opType = URX_TYPE(op);
// The loop is advancing linearly through the pattern.
@ -3306,7 +3306,7 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) {
case URX_STRING_I:
{
loc++;
int32_t stringLenOp = fRXPat->fCompiledPat->elementAti(loc);
int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
currentLen += URX_VAL(stringLenOp);
}
break;
@ -3346,7 +3346,7 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) {
int32_t depth = 0;
for (;;) {
loc++;
op = fRXPat->fCompiledPat->elementAti(loc);
op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
if (URX_TYPE(op) == URX_LA_START || URX_TYPE(op) == URX_LB_START) {
depth++;
}
@ -3409,7 +3409,7 @@ void RegexCompile::stripNOPs() {
int32_t d = 0;
for (loc=0; loc<end; loc++) {
deltas.addElement(d, *fStatus);
int32_t op = fRXPat->fCompiledPat->elementAti(loc);
int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
if (URX_TYPE(op) == URX_NOP) {
d++;
}
@ -3425,7 +3425,7 @@ void RegexCompile::stripNOPs() {
int32_t src;
int32_t dst = 0;
for (src=0; src<end; src++) {
int32_t op = fRXPat->fCompiledPat->elementAti(src);
int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(src);
int32_t opType = URX_TYPE(op);
switch (opType) {
case URX_NOP:
@ -3468,7 +3468,7 @@ void RegexCompile::stripNOPs() {
op = URX_BUILD(URX_STRING_I, URX_VAL(op)+stringDelta);
src++;
int32_t lengthOp = fRXPat->fCompiledPat->elementAti(src);
int32_t lengthOp = (int32_t)fRXPat->fCompiledPat->elementAti(src);
caseStringBuffer.setTo(fRXPat->fLiteralText, URX_VAL(op), URX_VAL(lengthOp));
caseStringBuffer.foldCase(U_FOLD_CASE_DEFAULT);
@ -3578,8 +3578,20 @@ void RegexCompile::stripNOPs() {
void RegexCompile::error(UErrorCode e) {
if (U_SUCCESS(*fStatus)) {
*fStatus = e;
fParseErr->line = fLineNum;
fParseErr->offset = fCharNum;
// Hmm. fParseErr (UParseError) line & offset fields are int32_t in public
// API (see common/unicode/parseerr.h), while fLineNum and fCharNum are
// int64_t. If the values of the latter are out of range for the former,
// set them to the appropriate "field not supported" values.
if (fLineNum > 0x7FFFFFFF) {
fParseErr->line = 0;
fParseErr->offset = -1;
} else if (fCharNum > 0x7FFFFFFF) {
fParseErr->line = (int32_t)fLineNum;
fParseErr->offset = -1;
} else {
fParseErr->line = (int32_t)fLineNum;
fParseErr->offset = (int32_t)fCharNum;
}
UErrorCode status = U_ZERO_ERROR; // throwaway status for extracting context
@ -3752,8 +3764,8 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
c.fQuoted = TRUE;
if (UTEXT_FULL_TEXT_IN_CHUNK(fRXPat->fPattern, fPatternLength)) {
int32_t endIndex = pos;
c.fChar = u_unescapeAt(uregex_ucstr_unescape_charAt, &endIndex, fPatternLength, (void *)fRXPat->fPattern->chunkContents);
int32_t endIndex = (int32_t)pos;
c.fChar = u_unescapeAt(uregex_ucstr_unescape_charAt, &endIndex, (int32_t)fPatternLength, (void *)fRXPat->fPattern->chunkContents);
if (endIndex == pos) {
error(U_REGEX_BAD_ESCAPE_SEQUENCE);

View file

@ -279,13 +279,18 @@ enum {
// Match Engine State Stack Frame Layout.
//
struct REStackFrame {
// Header
int64_t fInputIdx; // Position of next character in the input string
int32_t fPatIdx; // Position of next Op in the compiled pattern
int32_t fExtra[2]; // Extra state, for capture group start/ends
int64_t fPatIdx; // Position of next Op in the compiled pattern
// (int64_t for UVector64, values fit in an int32_t)
// Remainder
int64_t fExtra[1]; // Extra state, for capture group start/ends
// atomic parentheses, repeat counts, etc.
// Locations assigned at pattern compile time.
// Note that this will likely end up longer than 64 bits.
// Variable-length array.
};
// number of UVector elements in the header
#define RESTACKFRAME_HDRCOUNT 2
//
// Start-Of-Match type. Used by find() to quickly scan to positions where a

File diff suppressed because it is too large Load diff

View file

@ -17,6 +17,7 @@
#include "uassert.h"
#include "uvector.h"
#include "uvectr32.h"
#include "uvectr64.h"
#include "regexcmp.h"
#include "regeximp.h"
#include "regexst.h"
@ -161,7 +162,7 @@ void RegexPattern::init() {
fPattern = NULL; // will be set later
fPatternString = NULL; // may be set later
fCompiledPat = new UVector32(fDeferredStatus);
fCompiledPat = new UVector64(fDeferredStatus);
fGroupMap = new UVector32(fDeferredStatus);
fSets = new UVector(fDeferredStatus);
fInitialChars = new UnicodeSet;

View file

@ -62,6 +62,7 @@ class RegexMatcher;
class RegexPattern;
class UVector;
class UVector32;
class UVector64;
class UnicodeSet;
struct REStackFrame;
struct Regex8BitSet;
@ -575,7 +576,7 @@ private:
UnicodeString *fPatternString; // The original pattern UncodeString if relevant
uint32_t fFlags; // The flags used when compiling the pattern.
//
UVector32 *fCompiledPat; // The compiled pattern p-code.
UVector64 *fCompiledPat; // The compiled pattern p-code.
UnicodeString fLiteralText; // Any literal string data from the pattern,
// after un-escaping, for use during the match.
@ -1595,7 +1596,7 @@ private:
UBool isWordBoundary(int64_t pos); // perform Perl-like \b test
UBool isUWordBoundary(int64_t pos); // perform RBBI based \b test
REStackFrame *resetStack();
inline REStackFrame *StateSave(REStackFrame *fp, int32_t savePatIdx, UErrorCode &status);
inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status);
void IncrementTime(UErrorCode &status);
int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const;
@ -1648,13 +1649,13 @@ private:
UBool fRequireEnd; // True if the last match required end-of-input
// (matched $ or Z)
UVector32 *fStack;
UVector64 *fStack;
REStackFrame *fFrame; // After finding a match, the last active stack frame,
// which will contain the capture group results.
// NOT valid while match engine is running.
int32_t *fData; // Data area for use by the compiled pattern.
int32_t fSmallData[8]; // Use this for data if it's enough.
int64_t *fData; // Data area for use by the compiled pattern.
int64_t fSmallData[8]; // Use this for data if it's enough.
int32_t fTimeLimit; // Max time (in arbitrary steps) to let the
// match engine run. Zero for unlimited.