mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 14:31:31 +00:00
ICU-68 construct UnicodeString from invariant char*, alias from UChar*
X-SVN-Rev: 268
This commit is contained in:
parent
6f38a88ec1
commit
bbf2815b33
3 changed files with 160 additions and 46 deletions
|
@ -17,13 +17,15 @@
|
|||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include "unistr.h"
|
||||
|
||||
#include "utypes.h"
|
||||
#include "putil.h"
|
||||
#include "locid.h"
|
||||
#include "cstring.h"
|
||||
#include "cmemory.h"
|
||||
#include "ustring.h"
|
||||
#include "mutex.h"
|
||||
#include "unistr.h"
|
||||
|
||||
#if 0
|
||||
//DEBUGGING
|
||||
|
@ -152,6 +154,21 @@ UnicodeString::UnicodeString( const UChar *text,
|
|||
doReplace(0, 0, text, 0, textLength);
|
||||
}
|
||||
|
||||
UnicodeString::UnicodeString(bool_t isTerminated,
|
||||
UChar *text,
|
||||
int32_t textLength)
|
||||
: fArray(text),
|
||||
fLength(textLength != -1 || !isTerminated ? textLength : u_strlen(text)),
|
||||
fCapacity(isTerminated ? fLength + 1 : fLength),
|
||||
fRefCounted(FALSE),
|
||||
fHashCode(kInvalidHashCode),
|
||||
fBogus(FALSE)
|
||||
{
|
||||
if(fLength < 0) {
|
||||
setToBogus();
|
||||
}
|
||||
}
|
||||
|
||||
UnicodeString::UnicodeString(const char *codepageData,
|
||||
const char *codepage)
|
||||
: fArray(fStackBuffer),
|
||||
|
@ -621,10 +638,10 @@ UnicodeString::doReplace(UTextOffset start,
|
|||
// don't delete it until the end of the method. this can happen
|
||||
// in code like UnicodeString s = "foo"; s += s;
|
||||
if(srcChars != getArrayStart())
|
||||
delete [] fArray;
|
||||
delete [] fArray;
|
||||
else {
|
||||
deleteWhenDone = TRUE;
|
||||
bufferToDelete = fArray;
|
||||
deleteWhenDone = TRUE;
|
||||
bufferToDelete = fArray;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -771,15 +788,19 @@ UnicodeString::extract(UTextOffset start,
|
|||
int32_t arraySize = 0x0FFFFFFF;
|
||||
|
||||
// create the converter
|
||||
UConverter *converter = 0;
|
||||
UConverter *converter;
|
||||
|
||||
// if the codepage is the default, use our cache
|
||||
if(codepage == 0)
|
||||
if(codepage == 0) {
|
||||
converter = getDefaultConverter(status);
|
||||
else
|
||||
} else if(*codepage == 0) {
|
||||
converter = 0;
|
||||
} else {
|
||||
converter = ucnv_open(codepage, &status);
|
||||
}
|
||||
|
||||
// if we failed, set the appropriate flags and return
|
||||
// if it is an empty string, then use the "invariant character" conversion
|
||||
if(U_FAILURE(status)) {
|
||||
// close the converter
|
||||
if(codepage == 0)
|
||||
|
@ -789,14 +810,22 @@ UnicodeString::extract(UTextOffset start,
|
|||
return 0;
|
||||
}
|
||||
|
||||
// perform the conversion
|
||||
if(converter == 0) {
|
||||
// use the "invariant characters" conversion
|
||||
if(length > fLength - start) {
|
||||
length = fLength - start;
|
||||
}
|
||||
u_UCharsToChars(mySource, myTarget, length);
|
||||
return length;
|
||||
}
|
||||
|
||||
// there is no loop here since we assume the buffer is large enough
|
||||
myTargetLimit = myTarget + arraySize;
|
||||
|
||||
if(myTargetLimit < myTarget) /* ptr wrapped around: pin to U_MAX_PTR */
|
||||
myTargetLimit = (char*)U_MAX_PTR;
|
||||
|
||||
// perform the conversion
|
||||
// there is no loop here since we assume the buffer is large enough
|
||||
|
||||
ucnv_fromUnicode(converter, &myTarget, myTargetLimit,
|
||||
&mySource, mySourceEnd, NULL, TRUE, &status);
|
||||
|
||||
|
@ -822,7 +851,7 @@ UnicodeString::doCodepageCreate(const char *codepageData,
|
|||
int32_t sourceLen = dataLength;
|
||||
const char *mySource = codepageData;
|
||||
const char *mySourceEnd = mySource + sourceLen;
|
||||
UChar *myTarget = getArrayStart();
|
||||
UChar *myTarget;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t arraySize = getCapacity();
|
||||
|
||||
|
@ -830,9 +859,12 @@ UnicodeString::doCodepageCreate(const char *codepageData,
|
|||
UConverter *converter = 0;
|
||||
|
||||
// if the codepage is the default, use our cache
|
||||
converter = (codepage == 0
|
||||
? getDefaultConverter(status)
|
||||
: ucnv_open(codepage, &status));
|
||||
// if it is an empty string, then use the "invariant character" conversion
|
||||
converter = (codepage == 0 ?
|
||||
getDefaultConverter(status) :
|
||||
*codepage == 0 ?
|
||||
0 :
|
||||
ucnv_open(codepage, &status));
|
||||
|
||||
// if we failed, set the appropriate flags and return
|
||||
if(U_FAILURE(status)) {
|
||||
|
@ -845,8 +877,37 @@ UnicodeString::doCodepageCreate(const char *codepageData,
|
|||
return;
|
||||
}
|
||||
|
||||
fHashCode = kInvalidHashCode;
|
||||
|
||||
// perform the conversion
|
||||
do {
|
||||
if(converter == 0) {
|
||||
// use the "invariant characters" conversion
|
||||
if(arraySize < dataLength) {
|
||||
int32_t tempCapacity;
|
||||
// allocate enough space for the dataLength, the refCount, and a NUL
|
||||
UChar *temp = allocate(dataLength + 2, tempCapacity);
|
||||
|
||||
if(temp == 0) {
|
||||
// set flags and return
|
||||
setToBogus();
|
||||
return;
|
||||
}
|
||||
|
||||
fArray = temp;
|
||||
fCapacity = tempCapacity;
|
||||
|
||||
setRefCount(1);
|
||||
|
||||
u_charsToUChars(codepageData, fArray + 1, dataLength);
|
||||
fArray[dataLength + 1] = 0;
|
||||
} else {
|
||||
u_charsToUChars(codepageData, getArrayStart(), dataLength);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
myTarget = getArrayStart();
|
||||
for(;;) {
|
||||
// reset the error code
|
||||
status = U_ZERO_ERROR;
|
||||
|
||||
|
@ -859,30 +920,24 @@ UnicodeString::doCodepageCreate(const char *codepageData,
|
|||
arraySize = getCapacity() - fLength;
|
||||
|
||||
// allocate more space and copy data, if needed
|
||||
if(fLength < dataLength) {
|
||||
if(status == U_INDEX_OUTOFBOUNDS_ERROR) {
|
||||
int32_t tempCapacity;
|
||||
UChar *temp = allocate(fCapacity, tempCapacity);
|
||||
|
||||
if(! temp) {
|
||||
// close the converter
|
||||
if(codepage == 0)
|
||||
releaseDefaultConverter(converter);
|
||||
else
|
||||
ucnv_close(converter);
|
||||
// set flags and return
|
||||
setToBogus();
|
||||
return;
|
||||
// set flags and return
|
||||
setToBogus();
|
||||
break;
|
||||
}
|
||||
|
||||
// if we're not currently ref counted, shift the array right by one
|
||||
if(fRefCounted == FALSE)
|
||||
us_arrayCopy(fArray, 0, temp, 1, fLength);
|
||||
// otherwise, copy the old array into temp, including the ref count
|
||||
else
|
||||
us_arrayCopy(fArray, 0, temp, 0, fLength + 1);
|
||||
|
||||
if(fRefCounted && removeRef() == 0)
|
||||
delete [] fArray;
|
||||
if(fRefCounted) {
|
||||
// copy the old array into temp, including the ref count
|
||||
us_arrayCopy(fArray, 0, temp, 0, fLength + 1);
|
||||
delete [] fArray;
|
||||
} else {
|
||||
// if we're not currently ref counted, shift the array right by one
|
||||
us_arrayCopy(fArray, 0, temp, 1, fLength);
|
||||
}
|
||||
|
||||
fArray = temp;
|
||||
fCapacity = tempCapacity;
|
||||
|
@ -891,11 +946,10 @@ UnicodeString::doCodepageCreate(const char *codepageData,
|
|||
|
||||
myTarget = getArrayStart() + fLength;
|
||||
arraySize = getCapacity() - fLength;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
while(status == U_INDEX_OUTOFBOUNDS_ERROR);
|
||||
|
||||
fHashCode = kInvalidHashCode;
|
||||
|
||||
// close the converter
|
||||
if(codepage == 0)
|
||||
|
@ -925,9 +979,6 @@ UnicodeString::getUChars() const
|
|||
if(fBogus)
|
||||
return 0;
|
||||
|
||||
// clone our array, if necessary
|
||||
((UnicodeString*)this)->cloneArrayIfNeeded();
|
||||
|
||||
// no room for null, resize
|
||||
if(getCapacity() <= fLength) {
|
||||
// allocate at minimum the current capacity + needed space
|
||||
|
@ -955,8 +1006,10 @@ UnicodeString::getUChars() const
|
|||
((UnicodeString*)this)->setRefCount(1);
|
||||
}
|
||||
|
||||
// tack on a trailing null
|
||||
fArray[(fRefCounted ? 1 : 0) + fLength] = 0;
|
||||
if(getArrayStart()[fLength] != 0) {
|
||||
// tack on a trailing null
|
||||
((UChar *)getArrayStart())[fLength] = 0;
|
||||
}
|
||||
|
||||
return getArrayStart();
|
||||
}
|
||||
|
|
|
@ -40,6 +40,30 @@
|
|||
class Locale;
|
||||
class UCharReference;
|
||||
|
||||
/**
|
||||
* Unicode String literals in C++.
|
||||
* Dependent on the platform properties, different UnicodeString
|
||||
* constructors should be used to create a UnicodeString object from
|
||||
* a string literal.
|
||||
* The macros are defined for maximum performance.
|
||||
* They work only for strings that contain "invariant characters", i.e.,
|
||||
* only latin letters, digits, and some punctuation.
|
||||
* See utypes.h for details.
|
||||
*
|
||||
* The string parameter must be a C string literal.
|
||||
* The length of the string, not including the terminating
|
||||
* <code>NUL</code>, must be specified as a constant.
|
||||
* The U_STRING_DECL macro should be invoked exactly once for one
|
||||
* such string variable before it is used.
|
||||
*/
|
||||
#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
# define UNICODE_STRING(cs, length) UnicodeString(TRUE, (UChar *)L ## cs, length)
|
||||
#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
# define UNICODE_STRING(cs, length) UnicodeString(TRUE, (UChar *)cs, length)
|
||||
#else
|
||||
# define UNICODE_STRING(cs, length) UnicodeString(cs, length, "")
|
||||
#endif
|
||||
|
||||
/**
|
||||
* UnicodeString is a concrete implementation of the abstract class
|
||||
* UnicodeText. UnicodeString performs codeset conversion from char*
|
||||
|
@ -678,6 +702,9 @@ public:
|
|||
* @param target the target buffer for extraction
|
||||
* @param codepage the desired codepage for the characters. 0 has
|
||||
* the special meaning of the default codepage
|
||||
* If <code>codepage</code> is an empty string (<code>""</code>),
|
||||
* then a simple conversion is performed on the codepage-invariant
|
||||
* subset ("invariant characters") of the platform encoding. See utypes.h.
|
||||
* @return the number of characters written to <TT>dst</TT>
|
||||
*/
|
||||
int32_t extract(UTextOffset start,
|
||||
|
@ -1302,12 +1329,34 @@ public:
|
|||
UnicodeString(const UChar *text,
|
||||
int32_t textLength);
|
||||
|
||||
/**
|
||||
* Aliasing UChar* constructor.
|
||||
* The text will be used for the new UnicodeString object, but
|
||||
* it will not be released when the UnicodeString is destroyed.
|
||||
* Be careful not to attempt to modify the contents of the UnicodeString
|
||||
* if the text is read-only. Operations that allocate an entirely
|
||||
* new buffer are harmless.
|
||||
*
|
||||
* @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
|
||||
* This must be true if <code>textLength==-1</code>.
|
||||
* @param text The characters to alias for the UnicodeString.
|
||||
* @param textLength The number of Unicode characters in <code>text</code> to alias.
|
||||
* If -1, then this constructor will determine the length
|
||||
* by calling <code>u_strlen()</code>.
|
||||
*/
|
||||
UnicodeString(bool_t isTerminated,
|
||||
UChar *text,
|
||||
int32_t textLength);
|
||||
|
||||
/**
|
||||
* char* constructor.
|
||||
* @param codepageData an array of bytes, null-terminated
|
||||
* @param codepage the encoding of <TT>codepageData</TT>. The special
|
||||
* value 0 for <TT>codepage</TT> indicates that the text is in the
|
||||
* platform's default codepage.
|
||||
* If <code>codepage</code> is an empty string (<code>""</code>),
|
||||
* then a simple conversion is performed on the codepage-invariant
|
||||
* subset ("invariant characters") of the platform encoding. See utypes.h.
|
||||
*/
|
||||
UnicodeString(const char *codepageData,
|
||||
const char *codepage = 0);
|
||||
|
@ -1319,6 +1368,9 @@ public:
|
|||
* @param codepage the encoding of <TT>codepageData</TT>. The special
|
||||
* value 0 for <TT>codepage</TT> indicates that the text is in the
|
||||
* platform's default codepage.
|
||||
* If <code>codepage</code> is an empty string (<code>""</code>),
|
||||
* then a simple conversion is performed on the codepage-invariant
|
||||
* subset ("invariant characters") of the platform encoding. See utypes.h.
|
||||
*/
|
||||
UnicodeString(const char *codepageData,
|
||||
int32_t dataLength,
|
||||
|
@ -1454,7 +1506,16 @@ private:
|
|||
void pinIndices(UTextOffset& start,
|
||||
int32_t& length) const;
|
||||
|
||||
// Real ctor for converting from codepage data
|
||||
/*
|
||||
* Real constructor for converting from codepage data.
|
||||
* It assumes that it is called with !fRefCounted.
|
||||
*
|
||||
* If <code>codepage==0</code>, then the default converter
|
||||
* is used for the platform encoding.
|
||||
* If <code>codepage</code> is an empty string (<code>""</code>),
|
||||
* then a simple conversion is performed on the codepage-invariant
|
||||
* subset ("invariant characters") of the platform encoding. See utypes.h.
|
||||
*/
|
||||
void doCodepageCreate(const char *codepageData,
|
||||
int32_t dataLength,
|
||||
const char *codepage);
|
||||
|
@ -1472,8 +1533,8 @@ private:
|
|||
UChar *fArray; // the Unicode data
|
||||
int32_t fLength; // number characters in fArray
|
||||
int32_t fCapacity; // sizeof fArray
|
||||
bool_t fRefCounted; // indicates if we own storage
|
||||
int32_t fHashCode; // the hash code
|
||||
bool_t fRefCounted; // indicates if we own storage
|
||||
bool_t fBogus; // indicates if an operation failed
|
||||
|
||||
// constants
|
||||
|
|
|
@ -174,7 +174,7 @@ U_CAPI char* U_EXPORT2 u_austrcpy(char *s1,
|
|||
* parameters.
|
||||
* The string parameter must be a C string literal.
|
||||
* The length of the string, not including the terminating
|
||||
* <code>NUL</code> must be specified as a constant.
|
||||
* <code>NUL</code>, must be specified as a constant.
|
||||
* The U_STRING_DECL macro should be invoked exactly once for one
|
||||
* such string variable before it is used.
|
||||
*
|
||||
|
@ -195,10 +195,10 @@ U_CAPI char* U_EXPORT2 u_austrcpy(char *s1,
|
|||
* </pre>
|
||||
*/
|
||||
#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (UChar *)L ## cs }
|
||||
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (const UChar *)L ## cs }
|
||||
# define U_STRING_INIT(var, cs, length)
|
||||
#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (UChar *)cs }
|
||||
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (const UChar *)cs }
|
||||
# define U_STRING_INIT(var, cs, length)
|
||||
#else
|
||||
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]
|
||||
|
|
Loading…
Add table
Reference in a new issue