mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-4521 Merge from branch, fixes for 64bit alignment & type conversions
X-SVN-Rev: 27670
This commit is contained in:
parent
63e3fc6df5
commit
411a93712a
10 changed files with 762 additions and 265 deletions
2
.gitattributes
vendored
2
.gitattributes
vendored
|
@ -49,6 +49,8 @@ README text !eol
|
|||
*.tri2 -text
|
||||
|
||||
icu4c/icu4c.css -text
|
||||
icu4c/source/common/uvectr64.cpp -text
|
||||
icu4c/source/common/uvectr64.h -text
|
||||
icu4c/source/data/curr/pool.res -text
|
||||
icu4c/source/data/in/nfc.nrm -text
|
||||
icu4c/source/data/in/nfkc.nrm -text
|
||||
|
|
|
@ -73,7 +73,7 @@ LIBS = $(LIBICUDT) $(DEFAULT_LIBS)
|
|||
|
||||
OBJECTS = errorcode.o putil.o umath.o utypes.o uinvchar.o umutex.o ucln_cmn.o uinit.o uobject.o cmemory.o \
|
||||
udata.o ucmndata.o udatamem.o umapfile.o udataswp.o ucol_swp.o utrace.o \
|
||||
uhash.o uhash_us.o uenum.o ustrenum.o uvector.o ustack.o uvectr32.o \
|
||||
uhash.o uhash_us.o uenum.o ustrenum.o uvector.o ustack.o uvectr32.o uvectr64.o \
|
||||
ucnv.o ucnv_bld.o ucnv_cnv.o ucnv_io.o ucnv_cb.o ucnv_err.o ucnvlat1.o \
|
||||
ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
|
||||
ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o \
|
||||
|
|
|
@ -986,6 +986,14 @@
|
|||
RelativePath=".\uvectr32.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\uvectr64.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\uvectr64.h"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="configuration"
|
||||
|
|
188
icu4c/source/common/uvectr64.cpp
Normal file
188
icu4c/source/common/uvectr64.cpp
Normal file
|
@ -0,0 +1,188 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1999-2010, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "uvectr64.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
#define DEFAULT_CAPACITY 8
|
||||
|
||||
/*
|
||||
* Constants for hinting whether a key is an integer
|
||||
* or a pointer. If a hint bit is zero, then the associated
|
||||
* token is assumed to be an integer. This is needed for iSeries
|
||||
*/
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UVector64)
|
||||
|
||||
UVector64::UVector64(UErrorCode &status) :
|
||||
count(0),
|
||||
capacity(0),
|
||||
maxCapacity(0),
|
||||
elements(NULL)
|
||||
{
|
||||
_init(DEFAULT_CAPACITY, status);
|
||||
}
|
||||
|
||||
UVector64::UVector64(int32_t initialCapacity, UErrorCode &status) :
|
||||
count(0),
|
||||
capacity(0),
|
||||
maxCapacity(0),
|
||||
elements(0)
|
||||
{
|
||||
_init(initialCapacity, status);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void UVector64::_init(int32_t initialCapacity, UErrorCode &status) {
|
||||
// Fix bogus initialCapacity values; avoid malloc(0)
|
||||
if (initialCapacity < 1) {
|
||||
initialCapacity = DEFAULT_CAPACITY;
|
||||
}
|
||||
if (maxCapacity>0 && maxCapacity<initialCapacity) {
|
||||
initialCapacity = maxCapacity;
|
||||
}
|
||||
elements = (int64_t *)uprv_malloc(sizeof(int64_t)*initialCapacity);
|
||||
if (elements == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
capacity = initialCapacity;
|
||||
}
|
||||
}
|
||||
|
||||
UVector64::~UVector64() {
|
||||
uprv_free(elements);
|
||||
elements = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Assign this object to another (make this a copy of 'other').
|
||||
*/
|
||||
void UVector64::assign(const UVector64& other, UErrorCode &ec) {
|
||||
if (ensureCapacity(other.count, ec)) {
|
||||
setSize(other.count);
|
||||
for (int32_t i=0; i<other.count; ++i) {
|
||||
elements[i] = other.elements[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
UBool UVector64::operator==(const UVector64& other) {
|
||||
int32_t i;
|
||||
if (count != other.count) return FALSE;
|
||||
for (i=0; i<count; ++i) {
|
||||
if (elements[i] != other.elements[i]) {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
void UVector64::setElementAt(int64_t elem, int32_t index) {
|
||||
if (0 <= index && index < count) {
|
||||
elements[index] = elem;
|
||||
}
|
||||
/* else index out of range */
|
||||
}
|
||||
|
||||
void UVector64::insertElementAt(int64_t elem, int32_t index, UErrorCode &status) {
|
||||
// must have 0 <= index <= count
|
||||
if (0 <= index && index <= count && ensureCapacity(count + 1, status)) {
|
||||
for (int32_t i=count; i>index; --i) {
|
||||
elements[i] = elements[i-1];
|
||||
}
|
||||
elements[index] = elem;
|
||||
++count;
|
||||
}
|
||||
/* else index out of range */
|
||||
}
|
||||
|
||||
void UVector64::removeAllElements(void) {
|
||||
count = 0;
|
||||
}
|
||||
|
||||
UBool UVector64::expandCapacity(int32_t minimumCapacity, UErrorCode &status) {
|
||||
if (capacity >= minimumCapacity) {
|
||||
return TRUE;
|
||||
}
|
||||
if (maxCapacity>0 && minimumCapacity>maxCapacity) {
|
||||
status = U_BUFFER_OVERFLOW_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
int32_t newCap = capacity * 2;
|
||||
if (newCap < minimumCapacity) {
|
||||
newCap = minimumCapacity;
|
||||
}
|
||||
if (maxCapacity > 0 && newCap > maxCapacity) {
|
||||
newCap = maxCapacity;
|
||||
}
|
||||
int64_t* newElems = (int64_t *)uprv_realloc(elements, sizeof(int64_t)*newCap);
|
||||
if (newElems == NULL) {
|
||||
// We keep the original contents on the memory failure on realloc.
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
elements = newElems;
|
||||
capacity = newCap;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
void UVector64::setMaxCapacity(int32_t limit) {
|
||||
U_ASSERT(limit >= 0);
|
||||
maxCapacity = limit;
|
||||
if (maxCapacity < 0) {
|
||||
maxCapacity = 0;
|
||||
}
|
||||
if (capacity <= maxCapacity || maxCapacity == 0) {
|
||||
// Current capacity is within the new limit.
|
||||
return;
|
||||
}
|
||||
|
||||
// New maximum capacity is smaller than the current size.
|
||||
// Realloc the storage to the new, smaller size.
|
||||
int64_t* newElems = (int64_t *)uprv_realloc(elements, sizeof(int64_t)*maxCapacity);
|
||||
if (newElems == NULL) {
|
||||
// Realloc to smaller failed.
|
||||
// Just keep what we had. No need to call it a failure.
|
||||
return;
|
||||
}
|
||||
elements = newElems;
|
||||
capacity = maxCapacity;
|
||||
if (count > capacity) {
|
||||
count = capacity;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the size of this vector as follows: If newSize is smaller,
|
||||
* then truncate the array, possibly deleting held elements for i >=
|
||||
* newSize. If newSize is larger, grow the array, filling in new
|
||||
* slots with NULL.
|
||||
*/
|
||||
void UVector64::setSize(int32_t newSize) {
|
||||
int32_t i;
|
||||
if (newSize < 0) {
|
||||
return;
|
||||
}
|
||||
if (newSize > count) {
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
if (!ensureCapacity(newSize, ec)) {
|
||||
return;
|
||||
}
|
||||
for (i=count; i<newSize; ++i) {
|
||||
elements[i] = 0;
|
||||
}
|
||||
}
|
||||
count = newSize;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
277
icu4c/source/common/uvectr64.h
Normal file
277
icu4c/source/common/uvectr64.h
Normal file
|
@ -0,0 +1,277 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
//
|
||||
// UVector64 is a class implementing a vector of 64 bit integers.
|
||||
// It is similar to UVector32, but holds int64_t values rather than int32_t.
|
||||
// Most of the code is unchanged from UVector.
|
||||
//
|
||||
|
||||
#ifndef UVECTOR64_H
|
||||
#define UVECTOR64_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "uhash.h"
|
||||
#include "uassert.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* <p>Ultralightweight C++ implementation of an <tt>int64_t</tt> vector
|
||||
* that has a subset of methods from UVector32
|
||||
*
|
||||
* <p>This is a very simple implementation, written to satisfy an
|
||||
* immediate porting need. As such, it is not completely fleshed out,
|
||||
* and it aims for simplicity and conformity. Nonetheless, it serves
|
||||
* its purpose (porting code from java that uses java.util.Vector)
|
||||
* well, and it could be easily made into a more robust vector class.
|
||||
*
|
||||
* <p><b>Design notes</b>
|
||||
*
|
||||
* <p>There is index bounds checking, but little is done about it. If
|
||||
* indices are out of bounds, either nothing happens, or zero is
|
||||
* returned. We <em>do</em> avoid indexing off into the weeds.
|
||||
*
|
||||
* <p>There is detection of out of memory, but the handling is very
|
||||
* coarse-grained -- similar to UnicodeString's protocol, but even
|
||||
* coarser. The class contains <em>one static flag</em> that is set
|
||||
* when any call to <tt>new</tt> returns zero. This allows the caller
|
||||
* to use several vectors and make just one check at the end to see if
|
||||
* a memory failure occurred. This is more efficient than making a
|
||||
* check after each call on each vector when doing many operations on
|
||||
* multiple vectors. The single static flag works best when memory
|
||||
* failures are infrequent, and when recovery options are limited or
|
||||
* nonexistent.
|
||||
*
|
||||
* <p><b>To do</b>
|
||||
*
|
||||
* <p>Improve the handling of index out of bounds errors.
|
||||
*
|
||||
*/
|
||||
class U_COMMON_API UVector64 : public UObject {
|
||||
private:
|
||||
int32_t count;
|
||||
|
||||
int32_t capacity;
|
||||
|
||||
int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow.
|
||||
|
||||
int64_t* elements;
|
||||
|
||||
public:
|
||||
UVector64(UErrorCode &status);
|
||||
|
||||
UVector64(int32_t initialCapacity, UErrorCode &status);
|
||||
|
||||
virtual ~UVector64();
|
||||
|
||||
/**
|
||||
* Assign this object to another (make this a copy of 'other').
|
||||
* Use the 'assign' function to assign each element.
|
||||
*/
|
||||
void assign(const UVector64& other, UErrorCode &ec);
|
||||
|
||||
/**
|
||||
* Compare this vector with another. They will be considered
|
||||
* equal if they are of the same size and all elements are equal,
|
||||
* as compared using this object's comparer.
|
||||
*/
|
||||
UBool operator==(const UVector64& other);
|
||||
|
||||
/**
|
||||
* Equivalent to !operator==()
|
||||
*/
|
||||
inline UBool operator!=(const UVector64& other);
|
||||
|
||||
//------------------------------------------------------------
|
||||
// subset of java.util.Vector API
|
||||
//------------------------------------------------------------
|
||||
|
||||
void addElement(int64_t elem, UErrorCode &status);
|
||||
|
||||
void setElementAt(int64_t elem, int32_t index);
|
||||
|
||||
void insertElementAt(int64_t elem, int32_t index, UErrorCode &status);
|
||||
|
||||
int64_t elementAti(int32_t index) const;
|
||||
|
||||
//UBool equals(const UVector64 &other) const;
|
||||
|
||||
int64_t lastElementi(void) const;
|
||||
|
||||
//int32_t indexOf(int64_t elem, int32_t startIndex = 0) const;
|
||||
|
||||
//UBool contains(int64_t elem) const;
|
||||
|
||||
//UBool containsAll(const UVector64& other) const;
|
||||
|
||||
//UBool removeAll(const UVector64& other);
|
||||
|
||||
//UBool retainAll(const UVector64& other);
|
||||
|
||||
//void removeElementAt(int32_t index);
|
||||
|
||||
void removeAllElements();
|
||||
|
||||
int32_t size(void) const;
|
||||
|
||||
//UBool isEmpty(void) const;
|
||||
|
||||
// Inline. Use this one for speedy size check.
|
||||
inline UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status);
|
||||
|
||||
// Out-of-line, handles actual growth. Called by ensureCapacity() when necessary.
|
||||
UBool expandCapacity(int32_t minimumCapacity, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Change the size of this vector as follows: If newSize is
|
||||
* smaller, then truncate the array, possibly deleting held
|
||||
* elements for i >= newSize. If newSize is larger, grow the
|
||||
* array, filling in new slows with zero.
|
||||
*/
|
||||
void setSize(int32_t newSize);
|
||||
|
||||
//------------------------------------------------------------
|
||||
// New API
|
||||
//------------------------------------------------------------
|
||||
|
||||
//UBool containsNone(const UVector64& other) const;
|
||||
|
||||
|
||||
//void sortedInsert(int64_t elem, UErrorCode& ec);
|
||||
|
||||
/**
|
||||
* Returns a pointer to the internal array holding the vector.
|
||||
*/
|
||||
int64_t *getBuffer() const;
|
||||
|
||||
/**
|
||||
* Set the maximum allowed buffer capacity for this vector/stack.
|
||||
* Default with no limit set is unlimited, go until malloc() fails.
|
||||
* A Limit of zero means unlimited capacity.
|
||||
* Units are vector elements (64 bits each), not bytes.
|
||||
*/
|
||||
void setMaxCapacity(int32_t limit);
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for this class.
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
/**
|
||||
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
private:
|
||||
void _init(int32_t initialCapacity, UErrorCode &status);
|
||||
|
||||
// Disallow
|
||||
UVector64(const UVector64&);
|
||||
|
||||
// Disallow
|
||||
UVector64& operator=(const UVector64&);
|
||||
|
||||
|
||||
// API Functions for Stack operations.
|
||||
// In the original UVector, these were in a separate derived class, UStack.
|
||||
// Here in UVector64, they are all together.
|
||||
public:
|
||||
//UBool empty(void) const; // TODO: redundant, same as empty(). Remove it?
|
||||
|
||||
//int64_t peeki(void) const;
|
||||
|
||||
int64_t popi(void);
|
||||
|
||||
int64_t push(int64_t i, UErrorCode &status);
|
||||
|
||||
int64_t *reserveBlock(int32_t size, UErrorCode &status);
|
||||
int64_t *popFrame(int32_t size);
|
||||
};
|
||||
|
||||
|
||||
// UVector64 inlines
|
||||
|
||||
inline UBool UVector64::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
|
||||
if (capacity >= minimumCapacity) {
|
||||
return TRUE;
|
||||
} else {
|
||||
return expandCapacity(minimumCapacity, status);
|
||||
}
|
||||
}
|
||||
|
||||
inline int64_t UVector64::elementAti(int32_t index) const {
|
||||
return (0 <= index && index < count) ? elements[index] : 0;
|
||||
}
|
||||
|
||||
|
||||
inline void UVector64::addElement(int64_t elem, UErrorCode &status) {
|
||||
if (ensureCapacity(count + 1, status)) {
|
||||
elements[count] = elem;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
inline int64_t *UVector64::reserveBlock(int32_t size, UErrorCode &status) {
|
||||
if (ensureCapacity(count+size, status) == FALSE) {
|
||||
return NULL;
|
||||
}
|
||||
int64_t *rp = elements+count;
|
||||
count += size;
|
||||
return rp;
|
||||
}
|
||||
|
||||
inline int64_t *UVector64::popFrame(int32_t size) {
|
||||
U_ASSERT(count >= size);
|
||||
count -= size;
|
||||
if (count < 0) {
|
||||
count = 0;
|
||||
}
|
||||
return elements+count-size;
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline int32_t UVector64::size(void) const {
|
||||
return count;
|
||||
}
|
||||
|
||||
inline int64_t UVector64::lastElementi(void) const {
|
||||
return elementAti(count-1);
|
||||
}
|
||||
|
||||
inline UBool UVector64::operator!=(const UVector64& other) {
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
inline int64_t *UVector64::getBuffer() const {
|
||||
return elements;
|
||||
}
|
||||
|
||||
|
||||
// UStack inlines
|
||||
|
||||
inline int64_t UVector64::push(int64_t i, UErrorCode &status) {
|
||||
addElement(i, status);
|
||||
return i;
|
||||
}
|
||||
|
||||
inline int64_t UVector64::popi(void) {
|
||||
int64_t result = 0;
|
||||
if (count > 0) {
|
||||
count--;
|
||||
result = elements[count];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
|
@ -26,6 +26,7 @@
|
|||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "uvectr32.h"
|
||||
#include "uvectr64.h"
|
||||
#include "uassert.h"
|
||||
#include "ucln_in.h"
|
||||
#include "uinvchar.h"
|
||||
|
@ -292,7 +293,7 @@ void RegexCompile::compile(
|
|||
// present in the saved state: the input string position (int64_t) and
|
||||
// the position in the compiled pattern.
|
||||
//
|
||||
fRXPat->fFrameSize+=3;
|
||||
fRXPat->fFrameSize+=RESTACKFRAME_HDRCOUNT;
|
||||
|
||||
//
|
||||
// Optimization pass 1: NOPs, back-references, and case-folding
|
||||
|
@ -400,7 +401,7 @@ UBool RegexCompile::doParseActions(int32_t action)
|
|||
// side fails to match and backtracks. Locate the position for the
|
||||
// save from the location on the top of the parentheses stack.
|
||||
int32_t savePosition = fParenStack.popi();
|
||||
int32_t op = fRXPat->fCompiledPat->elementAti(savePosition);
|
||||
int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(savePosition);
|
||||
U_ASSERT(URX_TYPE(op) == URX_NOP); // original contents of reserved location
|
||||
op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+1);
|
||||
fRXPat->fCompiledPat->setElementAt(op, savePosition);
|
||||
|
@ -433,10 +434,10 @@ UBool RegexCompile::doParseActions(int32_t action)
|
|||
// - NOP, which may later be replaced by a save-state if there
|
||||
// is an '|' alternation within the parens.
|
||||
//
|
||||
// Each capture group gets three double-width slots in the save stack frame:
|
||||
// 0-1: Capture Group start position (in input string being matched.)
|
||||
// 2-3: Capture Group end position.
|
||||
// 4-5: Start of Match-in-progress.
|
||||
// Each capture group gets three slots in the save stack frame:
|
||||
// 0: Capture Group start position (in input string being matched.)
|
||||
// 1: Capture Group end position.
|
||||
// 2: Start of Match-in-progress.
|
||||
// The first two locations are for a completed capture group, and are
|
||||
// referred to by back references and the like.
|
||||
// The third location stores the capture start position when an START_CAPTURE is
|
||||
|
@ -444,8 +445,8 @@ UBool RegexCompile::doParseActions(int32_t action)
|
|||
// END_CAPTURE is encountered.
|
||||
{
|
||||
fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
|
||||
int32_t varsLoc = fRXPat->fFrameSize; // Reserve five slots in match stack frame.
|
||||
fRXPat->fFrameSize += 6;
|
||||
int32_t varsLoc = fRXPat->fFrameSize; // Reserve three slots in match stack frame.
|
||||
fRXPat->fFrameSize += 3;
|
||||
int32_t cop = URX_BUILD(URX_START_CAPTURE, varsLoc);
|
||||
fRXPat->fCompiledPat->addElement(cop, *fStatus);
|
||||
fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
|
||||
|
@ -539,10 +540,10 @@ UBool RegexCompile::doParseActions(int32_t action)
|
|||
// 8. code for parenthesized stuff.
|
||||
// 9. LA_END
|
||||
//
|
||||
// Three data slots are reserved, for saving the stack ptr and the (double-width) input position.
|
||||
// Two data slots are reserved, for saving the stack ptr and the input position.
|
||||
{
|
||||
int32_t dataLoc = fRXPat->fDataSize;
|
||||
fRXPat->fDataSize += 3;
|
||||
fRXPat->fDataSize += 2;
|
||||
int32_t op = URX_BUILD(URX_LA_START, dataLoc);
|
||||
fRXPat->fCompiledPat->addElement(op, *fStatus);
|
||||
|
||||
|
@ -583,10 +584,9 @@ UBool RegexCompile::doParseActions(int32_t action)
|
|||
// 6. BACKTRACK // code in block succeeded, so neg. lookahead fails.
|
||||
// 7. END_LA // Restore match region, in case look-ahead was using
|
||||
// an alternate (transparent) region.
|
||||
// Three data slots are reserved, for saving the stack ptr and the (double-width) input position.
|
||||
{
|
||||
int32_t dataLoc = fRXPat->fDataSize;
|
||||
fRXPat->fDataSize += 3;
|
||||
fRXPat->fDataSize += 2;
|
||||
int32_t op = URX_BUILD(URX_LA_START, dataLoc);
|
||||
fRXPat->fCompiledPat->addElement(op, *fStatus);
|
||||
|
||||
|
@ -625,12 +625,12 @@ UBool RegexCompile::doParseActions(int32_t action)
|
|||
// Allocate a block of matcher data, to contain (when running a match)
|
||||
// 0: Stack ptr on entry
|
||||
// 1: Input Index on entry
|
||||
// 2-3: Start index of match current match attempt.
|
||||
// 4-5: Original Input String len.
|
||||
// 2: Start index of match current match attempt.
|
||||
// 3: Original Input String len.
|
||||
|
||||
// Allocate data space
|
||||
int32_t dataLoc = fRXPat->fDataSize;
|
||||
fRXPat->fDataSize += 6;
|
||||
fRXPat->fDataSize += 4;
|
||||
|
||||
// Emit URX_LB_START
|
||||
int32_t op = URX_BUILD(URX_LB_START, dataLoc);
|
||||
|
@ -678,12 +678,12 @@ UBool RegexCompile::doParseActions(int32_t action)
|
|||
// Allocate a block of matcher data, to contain (when running a match)
|
||||
// 0: Stack ptr on entry
|
||||
// 1: Input Index on entry
|
||||
// 2-3: Start index of match current match attempt.
|
||||
// 4-5: Original Input String len.
|
||||
// 2: Start index of match current match attempt.
|
||||
// 3: Original Input String len.
|
||||
|
||||
// Allocate data space
|
||||
int32_t dataLoc = fRXPat->fDataSize;
|
||||
fRXPat->fDataSize += 6;
|
||||
fRXPat->fDataSize += 4;
|
||||
|
||||
// Emit URX_LB_START
|
||||
int32_t op = URX_BUILD(URX_LB_START, dataLoc);
|
||||
|
@ -765,14 +765,14 @@ UBool RegexCompile::doParseActions(int32_t action)
|
|||
|
||||
// Check for simple constructs, which may get special optimized code.
|
||||
if (topLoc == fRXPat->fCompiledPat->size() - 1) {
|
||||
int32_t repeatedOp = fRXPat->fCompiledPat->elementAti(topLoc);
|
||||
int32_t repeatedOp = (int32_t)fRXPat->fCompiledPat->elementAti(topLoc);
|
||||
|
||||
if (URX_TYPE(repeatedOp) == URX_SETREF) {
|
||||
// Emit optimized code for [char set]+
|
||||
int32_t loopOpI = URX_BUILD(URX_LOOP_SR_I, URX_VAL(repeatedOp));
|
||||
fRXPat->fCompiledPat->addElement(loopOpI, *fStatus);
|
||||
frameLoc = fRXPat->fFrameSize;
|
||||
fRXPat->fFrameSize += 2; // double-width index
|
||||
fRXPat->fFrameSize++;
|
||||
int32_t loopOpC = URX_BUILD(URX_LOOP_C, frameLoc);
|
||||
fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
|
||||
break;
|
||||
|
@ -792,7 +792,7 @@ UBool RegexCompile::doParseActions(int32_t action)
|
|||
}
|
||||
fRXPat->fCompiledPat->addElement(loopOpI, *fStatus);
|
||||
frameLoc = fRXPat->fFrameSize;
|
||||
fRXPat->fFrameSize += 2; // double-width index
|
||||
fRXPat->fFrameSize++;
|
||||
int32_t loopOpC = URX_BUILD(URX_LOOP_C, frameLoc);
|
||||
fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
|
||||
break;
|
||||
|
@ -809,7 +809,7 @@ UBool RegexCompile::doParseActions(int32_t action)
|
|||
// Emit the code sequence that can handle it.
|
||||
insertOp(topLoc);
|
||||
frameLoc = fRXPat->fFrameSize;
|
||||
fRXPat->fFrameSize += 2; // double-width index
|
||||
fRXPat->fFrameSize++;
|
||||
|
||||
int32_t op = URX_BUILD(URX_STO_INP_LOC, frameLoc);
|
||||
fRXPat->fCompiledPat->setElementAt(op, topLoc);
|
||||
|
@ -908,14 +908,14 @@ UBool RegexCompile::doParseActions(int32_t action)
|
|||
// Check for simple *, where the construct being repeated
|
||||
// compiled to single opcode, and might be optimizable.
|
||||
if (topLoc == fRXPat->fCompiledPat->size() - 1) {
|
||||
int32_t repeatedOp = fRXPat->fCompiledPat->elementAti(topLoc);
|
||||
int32_t repeatedOp = (int32_t)fRXPat->fCompiledPat->elementAti(topLoc);
|
||||
|
||||
if (URX_TYPE(repeatedOp) == URX_SETREF) {
|
||||
// Emit optimized code for a [char set]*
|
||||
int32_t loopOpI = URX_BUILD(URX_LOOP_SR_I, URX_VAL(repeatedOp));
|
||||
fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc);
|
||||
dataLoc = fRXPat->fFrameSize;
|
||||
fRXPat->fFrameSize += 2; // double-width index
|
||||
fRXPat->fFrameSize++;
|
||||
int32_t loopOpC = URX_BUILD(URX_LOOP_C, dataLoc);
|
||||
fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
|
||||
break;
|
||||
|
@ -935,7 +935,7 @@ UBool RegexCompile::doParseActions(int32_t action)
|
|||
}
|
||||
fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc);
|
||||
dataLoc = fRXPat->fFrameSize;
|
||||
fRXPat->fFrameSize += 2; // double-width index
|
||||
fRXPat->fFrameSize++;
|
||||
int32_t loopOpC = URX_BUILD(URX_LOOP_C, dataLoc);
|
||||
fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
|
||||
break;
|
||||
|
@ -953,7 +953,7 @@ UBool RegexCompile::doParseActions(int32_t action)
|
|||
if (minMatchLength(saveStateLoc, fRXPat->fCompiledPat->size()-1) == 0) {
|
||||
insertOp(saveStateLoc);
|
||||
dataLoc = fRXPat->fFrameSize;
|
||||
fRXPat->fFrameSize += 2; // double-width index
|
||||
fRXPat->fFrameSize++;
|
||||
|
||||
int32_t op = URX_BUILD(URX_STO_INP_LOC, dataLoc);
|
||||
fRXPat->fCompiledPat->setElementAt(op, saveStateLoc+1);
|
||||
|
@ -1060,7 +1060,7 @@ UBool RegexCompile::doParseActions(int32_t action)
|
|||
int32_t op = URX_BUILD(URX_STO_SP, varLoc);
|
||||
fRXPat->fCompiledPat->setElementAt(op, topLoc);
|
||||
|
||||
int32_t loopOp = fRXPat->fCompiledPat->popi();
|
||||
int32_t loopOp = (int32_t)fRXPat->fCompiledPat->popi();
|
||||
U_ASSERT(URX_TYPE(loopOp) == URX_CTR_LOOP && URX_VAL(loopOp) == topLoc);
|
||||
loopOp++; // point LoopOp after the just-inserted STO_SP
|
||||
fRXPat->fCompiledPat->push(loopOp, *fStatus);
|
||||
|
@ -1768,7 +1768,7 @@ void RegexCompile::literalChar(UChar32 c) {
|
|||
|
||||
// If the last thing compiled into the pattern was not a literal char,
|
||||
// force this new literal char to begin a new string, and not append to the previous.
|
||||
op = fRXPat->fCompiledPat->lastElementi();
|
||||
op = (int32_t)fRXPat->fCompiledPat->lastElementi();
|
||||
opType = URX_TYPE(op);
|
||||
if (!(opType == URX_STRING_LEN || opType == URX_ONECHAR || opType == URX_ONECHAR_I)) {
|
||||
fixLiterals();
|
||||
|
@ -1784,7 +1784,7 @@ void RegexCompile::literalChar(UChar32 c) {
|
|||
return;
|
||||
}
|
||||
|
||||
op = fRXPat->fCompiledPat->lastElementi();
|
||||
op = (int32_t)fRXPat->fCompiledPat->lastElementi();
|
||||
opType = URX_TYPE(op);
|
||||
U_ASSERT(opType == URX_ONECHAR || opType == URX_ONECHAR_I || opType == URX_STRING_LEN);
|
||||
|
||||
|
@ -1888,7 +1888,7 @@ void RegexCompile::fixLiterals(UBool split) {
|
|||
|
||||
// If the last operation from the compiled pattern is not a string,
|
||||
// nothing needs to be done
|
||||
op = fRXPat->fCompiledPat->lastElementi();
|
||||
op = (int32_t)fRXPat->fCompiledPat->lastElementi();
|
||||
opType = URX_TYPE(op);
|
||||
if (opType != URX_STRING_LEN) {
|
||||
return;
|
||||
|
@ -1942,7 +1942,7 @@ void RegexCompile::fixLiterals(UBool split) {
|
|||
//
|
||||
//------------------------------------------------------------------------------
|
||||
void RegexCompile::insertOp(int32_t where) {
|
||||
UVector32 *code = fRXPat->fCompiledPat;
|
||||
UVector64 *code = fRXPat->fCompiledPat;
|
||||
U_ASSERT(where>0 && where < code->size());
|
||||
|
||||
int32_t nop = URX_BUILD(URX_NOP, 0);
|
||||
|
@ -1952,7 +1952,7 @@ void RegexCompile::insertOp(int32_t where) {
|
|||
// were moved down by the insert. Fix them.
|
||||
int32_t loc;
|
||||
for (loc=0; loc<code->size(); loc++) {
|
||||
int32_t op = code->elementAti(loc);
|
||||
int32_t op = (int32_t)code->elementAti(loc);
|
||||
int32_t opType = URX_TYPE(op);
|
||||
int32_t opValue = URX_VAL(op);
|
||||
if ((opType == URX_JMP ||
|
||||
|
@ -2070,7 +2070,7 @@ void RegexCompile::handleCloseParen() {
|
|||
break;
|
||||
}
|
||||
U_ASSERT(patIdx>0 && patIdx <= fRXPat->fCompiledPat->size());
|
||||
patOp = fRXPat->fCompiledPat->elementAti(patIdx);
|
||||
patOp = (int32_t)fRXPat->fCompiledPat->elementAti(patIdx);
|
||||
U_ASSERT(URX_VAL(patOp) == 0); // Branch target for JMP should not be set.
|
||||
patOp |= fRXPat->fCompiledPat->size(); // Set it now.
|
||||
fRXPat->fCompiledPat->setElementAt(patOp, patIdx);
|
||||
|
@ -2098,7 +2098,7 @@ void RegexCompile::handleCloseParen() {
|
|||
// The frame offset of the variables for this cg is obtained from the
|
||||
// start capture op and put it into the end-capture op.
|
||||
{
|
||||
int32_t captureOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen+1);
|
||||
int32_t captureOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen+1);
|
||||
U_ASSERT(URX_TYPE(captureOp) == URX_START_CAPTURE);
|
||||
|
||||
int32_t frameVarLocation = URX_VAL(captureOp);
|
||||
|
@ -2111,7 +2111,7 @@ void RegexCompile::handleCloseParen() {
|
|||
// Insert a LD_SP operation to restore the state stack to the position
|
||||
// it was when the atomic parens were entered.
|
||||
{
|
||||
int32_t stoOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen+1);
|
||||
int32_t stoOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen+1);
|
||||
U_ASSERT(URX_TYPE(stoOp) == URX_STO_SP);
|
||||
int32_t stoLoc = URX_VAL(stoOp);
|
||||
int32_t ldOp = URX_BUILD(URX_LD_SP, stoLoc);
|
||||
|
@ -2121,7 +2121,7 @@ void RegexCompile::handleCloseParen() {
|
|||
|
||||
case lookAhead:
|
||||
{
|
||||
int32_t startOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen-5);
|
||||
int32_t startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen-5);
|
||||
U_ASSERT(URX_TYPE(startOp) == URX_LA_START);
|
||||
int32_t dataLoc = URX_VAL(startOp);
|
||||
int32_t op = URX_BUILD(URX_LA_END, dataLoc);
|
||||
|
@ -2132,7 +2132,7 @@ void RegexCompile::handleCloseParen() {
|
|||
case negLookAhead:
|
||||
{
|
||||
// See comment at doOpenLookAheadNeg
|
||||
int32_t startOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen-1);
|
||||
int32_t startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen-1);
|
||||
U_ASSERT(URX_TYPE(startOp) == URX_LA_START);
|
||||
int32_t dataLoc = URX_VAL(startOp);
|
||||
int32_t op = URX_BUILD(URX_LA_END, dataLoc);
|
||||
|
@ -2144,7 +2144,7 @@ void RegexCompile::handleCloseParen() {
|
|||
|
||||
// Patch the URX_SAVE near the top of the block.
|
||||
// The destination of the SAVE is the final LA_END that was just added.
|
||||
int32_t saveOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen);
|
||||
int32_t saveOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen);
|
||||
U_ASSERT(URX_TYPE(saveOp) == URX_STATE_SAVE);
|
||||
int32_t dest = fRXPat->fCompiledPat->size()-1;
|
||||
saveOp = URX_BUILD(URX_STATE_SAVE, dest);
|
||||
|
@ -2157,7 +2157,7 @@ void RegexCompile::handleCloseParen() {
|
|||
// See comment at doOpenLookBehind.
|
||||
|
||||
// Append the URX_LB_END and URX_LA_END to the compiled pattern.
|
||||
int32_t startOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen-4);
|
||||
int32_t startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen-4);
|
||||
U_ASSERT(URX_TYPE(startOp) == URX_LB_START);
|
||||
int32_t dataLoc = URX_VAL(startOp);
|
||||
int32_t op = URX_BUILD(URX_LB_END, dataLoc);
|
||||
|
@ -2192,7 +2192,7 @@ void RegexCompile::handleCloseParen() {
|
|||
// See comment at doOpenLookBehindNeg.
|
||||
|
||||
// Append the URX_LBN_END to the compiled pattern.
|
||||
int32_t startOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen-5);
|
||||
int32_t startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen-5);
|
||||
U_ASSERT(URX_TYPE(startOp) == URX_LB_START);
|
||||
int32_t dataLoc = URX_VAL(startOp);
|
||||
int32_t op = URX_BUILD(URX_LBN_END, dataLoc);
|
||||
|
@ -2373,7 +2373,7 @@ UBool RegexCompile::compileInlineInterval() {
|
|||
|
||||
// Pick up the opcode that is to be repeated
|
||||
//
|
||||
int32_t op = fRXPat->fCompiledPat->elementAti(topOfBlock);
|
||||
int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(topOfBlock);
|
||||
|
||||
// Compute the pattern location where the inline sequence
|
||||
// will end, and set up the state save op that will be needed.
|
||||
|
@ -2446,7 +2446,7 @@ void RegexCompile::matchStartType() {
|
|||
}
|
||||
|
||||
for (loc = 3; loc<end; loc++) {
|
||||
op = fRXPat->fCompiledPat->elementAti(loc);
|
||||
op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
|
||||
opType = URX_TYPE(op);
|
||||
|
||||
// The loop is advancing linearly through the pattern.
|
||||
|
@ -2685,7 +2685,7 @@ void RegexCompile::matchStartType() {
|
|||
case URX_STRING:
|
||||
{
|
||||
loc++;
|
||||
int32_t stringLenOp = fRXPat->fCompiledPat->elementAti(loc);
|
||||
int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
|
||||
int32_t stringLen = URX_VAL(stringLenOp);
|
||||
U_ASSERT(URX_TYPE(stringLenOp) == URX_STRING_LEN);
|
||||
U_ASSERT(stringLenOp >= 2);
|
||||
|
@ -2714,7 +2714,7 @@ void RegexCompile::matchStartType() {
|
|||
// attempt a string search for possible match positions. But we
|
||||
// do update the set of possible starting characters.
|
||||
loc++;
|
||||
int32_t stringLenOp = fRXPat->fCompiledPat->elementAti(loc);
|
||||
int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
|
||||
int32_t stringLen = URX_VAL(stringLenOp);
|
||||
U_ASSERT(URX_TYPE(stringLenOp) == URX_STRING_LEN);
|
||||
U_ASSERT(stringLenOp >= 2);
|
||||
|
@ -2743,9 +2743,9 @@ void RegexCompile::matchStartType() {
|
|||
// move loc forwards to the end of the loop, skipping over the body.
|
||||
// If the min count is > 0,
|
||||
// continue normal processing of the body of the loop.
|
||||
int32_t loopEndLoc = fRXPat->fCompiledPat->elementAti(loc+1);
|
||||
int32_t loopEndLoc = (int32_t)fRXPat->fCompiledPat->elementAti(loc+1);
|
||||
loopEndLoc = URX_VAL(loopEndLoc);
|
||||
int32_t minLoopCount = fRXPat->fCompiledPat->elementAti(loc+2);
|
||||
int32_t minLoopCount = (int32_t)fRXPat->fCompiledPat->elementAti(loc+2);
|
||||
if (minLoopCount == 0) {
|
||||
// Min Loop Count of 0, treat like a forward branch and
|
||||
// move the current minimum length up to the target
|
||||
|
@ -2787,7 +2787,7 @@ void RegexCompile::matchStartType() {
|
|||
int32_t depth = (opType == URX_LA_START? 2: 1);
|
||||
for (;;) {
|
||||
loc++;
|
||||
op = fRXPat->fCompiledPat->elementAti(loc);
|
||||
op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
|
||||
if (URX_TYPE(op) == URX_LA_START) {
|
||||
depth+=2;
|
||||
}
|
||||
|
@ -2925,7 +2925,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
|
|||
}
|
||||
|
||||
for (loc = start; loc<=end; loc++) {
|
||||
op = fRXPat->fCompiledPat->elementAti(loc);
|
||||
op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
|
||||
opType = URX_TYPE(op);
|
||||
|
||||
// The loop is advancing linearly through the pattern.
|
||||
|
@ -3034,7 +3034,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
|
|||
case URX_STRING_I:
|
||||
{
|
||||
loc++;
|
||||
int32_t stringLenOp = fRXPat->fCompiledPat->elementAti(loc);
|
||||
int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
|
||||
currentLen += URX_VAL(stringLenOp);
|
||||
}
|
||||
break;
|
||||
|
@ -3048,9 +3048,9 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
|
|||
// move loc forwards to the end of the loop, skipping over the body.
|
||||
// If the min count is > 0,
|
||||
// continue normal processing of the body of the loop.
|
||||
int32_t loopEndLoc = fRXPat->fCompiledPat->elementAti(loc+1);
|
||||
int32_t loopEndLoc = (int32_t)fRXPat->fCompiledPat->elementAti(loc+1);
|
||||
loopEndLoc = URX_VAL(loopEndLoc);
|
||||
int32_t minLoopCount = fRXPat->fCompiledPat->elementAti(loc+2);
|
||||
int32_t minLoopCount = (int32_t)fRXPat->fCompiledPat->elementAti(loc+2);
|
||||
if (minLoopCount == 0) {
|
||||
loc = loopEndLoc;
|
||||
} else {
|
||||
|
@ -3085,7 +3085,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
|
|||
int32_t depth = (opType == URX_LA_START? 2: 1);;
|
||||
for (;;) {
|
||||
loc++;
|
||||
op = fRXPat->fCompiledPat->elementAti(loc);
|
||||
op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
|
||||
if (URX_TYPE(op) == URX_LA_START) {
|
||||
// The boilerplate for look-ahead includes two LA_END insturctions,
|
||||
// Depth will be decremented by each one when it is seen.
|
||||
|
@ -3179,7 +3179,7 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) {
|
|||
}
|
||||
|
||||
for (loc = start; loc<=end; loc++) {
|
||||
op = fRXPat->fCompiledPat->elementAti(loc);
|
||||
op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
|
||||
opType = URX_TYPE(op);
|
||||
|
||||
// The loop is advancing linearly through the pattern.
|
||||
|
@ -3306,7 +3306,7 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) {
|
|||
case URX_STRING_I:
|
||||
{
|
||||
loc++;
|
||||
int32_t stringLenOp = fRXPat->fCompiledPat->elementAti(loc);
|
||||
int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
|
||||
currentLen += URX_VAL(stringLenOp);
|
||||
}
|
||||
break;
|
||||
|
@ -3346,7 +3346,7 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) {
|
|||
int32_t depth = 0;
|
||||
for (;;) {
|
||||
loc++;
|
||||
op = fRXPat->fCompiledPat->elementAti(loc);
|
||||
op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
|
||||
if (URX_TYPE(op) == URX_LA_START || URX_TYPE(op) == URX_LB_START) {
|
||||
depth++;
|
||||
}
|
||||
|
@ -3409,7 +3409,7 @@ void RegexCompile::stripNOPs() {
|
|||
int32_t d = 0;
|
||||
for (loc=0; loc<end; loc++) {
|
||||
deltas.addElement(d, *fStatus);
|
||||
int32_t op = fRXPat->fCompiledPat->elementAti(loc);
|
||||
int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
|
||||
if (URX_TYPE(op) == URX_NOP) {
|
||||
d++;
|
||||
}
|
||||
|
@ -3425,7 +3425,7 @@ void RegexCompile::stripNOPs() {
|
|||
int32_t src;
|
||||
int32_t dst = 0;
|
||||
for (src=0; src<end; src++) {
|
||||
int32_t op = fRXPat->fCompiledPat->elementAti(src);
|
||||
int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(src);
|
||||
int32_t opType = URX_TYPE(op);
|
||||
switch (opType) {
|
||||
case URX_NOP:
|
||||
|
@ -3468,7 +3468,7 @@ void RegexCompile::stripNOPs() {
|
|||
op = URX_BUILD(URX_STRING_I, URX_VAL(op)+stringDelta);
|
||||
|
||||
src++;
|
||||
int32_t lengthOp = fRXPat->fCompiledPat->elementAti(src);
|
||||
int32_t lengthOp = (int32_t)fRXPat->fCompiledPat->elementAti(src);
|
||||
|
||||
caseStringBuffer.setTo(fRXPat->fLiteralText, URX_VAL(op), URX_VAL(lengthOp));
|
||||
caseStringBuffer.foldCase(U_FOLD_CASE_DEFAULT);
|
||||
|
@ -3578,8 +3578,20 @@ void RegexCompile::stripNOPs() {
|
|||
void RegexCompile::error(UErrorCode e) {
|
||||
if (U_SUCCESS(*fStatus)) {
|
||||
*fStatus = e;
|
||||
fParseErr->line = fLineNum;
|
||||
fParseErr->offset = fCharNum;
|
||||
// Hmm. fParseErr (UParseError) line & offset fields are int32_t in public
|
||||
// API (see common/unicode/parseerr.h), while fLineNum and fCharNum are
|
||||
// int64_t. If the values of the latter are out of range for the former,
|
||||
// set them to the appropriate "field not supported" values.
|
||||
if (fLineNum > 0x7FFFFFFF) {
|
||||
fParseErr->line = 0;
|
||||
fParseErr->offset = -1;
|
||||
} else if (fCharNum > 0x7FFFFFFF) {
|
||||
fParseErr->line = (int32_t)fLineNum;
|
||||
fParseErr->offset = -1;
|
||||
} else {
|
||||
fParseErr->line = (int32_t)fLineNum;
|
||||
fParseErr->offset = (int32_t)fCharNum;
|
||||
}
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR; // throwaway status for extracting context
|
||||
|
||||
|
@ -3752,8 +3764,8 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
|
|||
c.fQuoted = TRUE;
|
||||
|
||||
if (UTEXT_FULL_TEXT_IN_CHUNK(fRXPat->fPattern, fPatternLength)) {
|
||||
int32_t endIndex = pos;
|
||||
c.fChar = u_unescapeAt(uregex_ucstr_unescape_charAt, &endIndex, fPatternLength, (void *)fRXPat->fPattern->chunkContents);
|
||||
int32_t endIndex = (int32_t)pos;
|
||||
c.fChar = u_unescapeAt(uregex_ucstr_unescape_charAt, &endIndex, (int32_t)fPatternLength, (void *)fRXPat->fPattern->chunkContents);
|
||||
|
||||
if (endIndex == pos) {
|
||||
error(U_REGEX_BAD_ESCAPE_SEQUENCE);
|
||||
|
|
|
@ -279,13 +279,18 @@ enum {
|
|||
// Match Engine State Stack Frame Layout.
|
||||
//
|
||||
struct REStackFrame {
|
||||
// Header
|
||||
int64_t fInputIdx; // Position of next character in the input string
|
||||
int32_t fPatIdx; // Position of next Op in the compiled pattern
|
||||
int32_t fExtra[2]; // Extra state, for capture group start/ends
|
||||
int64_t fPatIdx; // Position of next Op in the compiled pattern
|
||||
// (int64_t for UVector64, values fit in an int32_t)
|
||||
// Remainder
|
||||
int64_t fExtra[1]; // Extra state, for capture group start/ends
|
||||
// atomic parentheses, repeat counts, etc.
|
||||
// Locations assigned at pattern compile time.
|
||||
// Note that this will likely end up longer than 64 bits.
|
||||
// Variable-length array.
|
||||
};
|
||||
// number of UVector elements in the header
|
||||
#define RESTACKFRAME_HDRCOUNT 2
|
||||
|
||||
//
|
||||
// Start-Of-Match type. Used by find() to quickly scan to positions where a
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -17,6 +17,7 @@
|
|||
#include "uassert.h"
|
||||
#include "uvector.h"
|
||||
#include "uvectr32.h"
|
||||
#include "uvectr64.h"
|
||||
#include "regexcmp.h"
|
||||
#include "regeximp.h"
|
||||
#include "regexst.h"
|
||||
|
@ -161,7 +162,7 @@ void RegexPattern::init() {
|
|||
|
||||
fPattern = NULL; // will be set later
|
||||
fPatternString = NULL; // may be set later
|
||||
fCompiledPat = new UVector32(fDeferredStatus);
|
||||
fCompiledPat = new UVector64(fDeferredStatus);
|
||||
fGroupMap = new UVector32(fDeferredStatus);
|
||||
fSets = new UVector(fDeferredStatus);
|
||||
fInitialChars = new UnicodeSet;
|
||||
|
|
|
@ -62,6 +62,7 @@ class RegexMatcher;
|
|||
class RegexPattern;
|
||||
class UVector;
|
||||
class UVector32;
|
||||
class UVector64;
|
||||
class UnicodeSet;
|
||||
struct REStackFrame;
|
||||
struct Regex8BitSet;
|
||||
|
@ -575,7 +576,7 @@ private:
|
|||
UnicodeString *fPatternString; // The original pattern UncodeString if relevant
|
||||
uint32_t fFlags; // The flags used when compiling the pattern.
|
||||
//
|
||||
UVector32 *fCompiledPat; // The compiled pattern p-code.
|
||||
UVector64 *fCompiledPat; // The compiled pattern p-code.
|
||||
UnicodeString fLiteralText; // Any literal string data from the pattern,
|
||||
// after un-escaping, for use during the match.
|
||||
|
||||
|
@ -1595,7 +1596,7 @@ private:
|
|||
UBool isWordBoundary(int64_t pos); // perform Perl-like \b test
|
||||
UBool isUWordBoundary(int64_t pos); // perform RBBI based \b test
|
||||
REStackFrame *resetStack();
|
||||
inline REStackFrame *StateSave(REStackFrame *fp, int32_t savePatIdx, UErrorCode &status);
|
||||
inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status);
|
||||
void IncrementTime(UErrorCode &status);
|
||||
|
||||
int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const;
|
||||
|
@ -1648,13 +1649,13 @@ private:
|
|||
UBool fRequireEnd; // True if the last match required end-of-input
|
||||
// (matched $ or Z)
|
||||
|
||||
UVector32 *fStack;
|
||||
UVector64 *fStack;
|
||||
REStackFrame *fFrame; // After finding a match, the last active stack frame,
|
||||
// which will contain the capture group results.
|
||||
// NOT valid while match engine is running.
|
||||
|
||||
int32_t *fData; // Data area for use by the compiled pattern.
|
||||
int32_t fSmallData[8]; // Use this for data if it's enough.
|
||||
int64_t *fData; // Data area for use by the compiled pattern.
|
||||
int64_t fSmallData[8]; // Use this for data if it's enough.
|
||||
|
||||
int32_t fTimeLimit; // Max time (in arbitrary steps) to let the
|
||||
// match engine run. Zero for unlimited.
|
||||
|
|
Loading…
Add table
Reference in a new issue