ICU-68 construct UnicodeString from invariant char*, alias from UChar*

X-SVN-Rev: 268
This commit is contained in:
Markus Scherer 1999-12-01 17:51:53 +00:00
parent 6f38a88ec1
commit bbf2815b33
3 changed files with 160 additions and 46 deletions

View file

@ -17,13 +17,15 @@
*******************************************************************************
*/
#include "unistr.h"
#include "utypes.h"
#include "putil.h"
#include "locid.h"
#include "cstring.h"
#include "cmemory.h"
#include "ustring.h"
#include "mutex.h"
#include "unistr.h"
#if 0
//DEBUGGING
@ -152,6 +154,21 @@ UnicodeString::UnicodeString( const UChar *text,
doReplace(0, 0, text, 0, textLength);
}
UnicodeString::UnicodeString(bool_t isTerminated,
UChar *text,
int32_t textLength)
: fArray(text),
fLength(textLength != -1 || !isTerminated ? textLength : u_strlen(text)),
fCapacity(isTerminated ? fLength + 1 : fLength),
fRefCounted(FALSE),
fHashCode(kInvalidHashCode),
fBogus(FALSE)
{
if(fLength < 0) {
setToBogus();
}
}
UnicodeString::UnicodeString(const char *codepageData,
const char *codepage)
: fArray(fStackBuffer),
@ -621,10 +638,10 @@ UnicodeString::doReplace(UTextOffset start,
// don't delete it until the end of the method. this can happen
// in code like UnicodeString s = "foo"; s += s;
if(srcChars != getArrayStart())
delete [] fArray;
delete [] fArray;
else {
deleteWhenDone = TRUE;
bufferToDelete = fArray;
deleteWhenDone = TRUE;
bufferToDelete = fArray;
}
}
@ -771,15 +788,19 @@ UnicodeString::extract(UTextOffset start,
int32_t arraySize = 0x0FFFFFFF;
// create the converter
UConverter *converter = 0;
UConverter *converter;
// if the codepage is the default, use our cache
if(codepage == 0)
if(codepage == 0) {
converter = getDefaultConverter(status);
else
} else if(*codepage == 0) {
converter = 0;
} else {
converter = ucnv_open(codepage, &status);
}
// if we failed, set the appropriate flags and return
// if it is an empty string, then use the "invariant character" conversion
if(U_FAILURE(status)) {
// close the converter
if(codepage == 0)
@ -789,14 +810,22 @@ UnicodeString::extract(UTextOffset start,
return 0;
}
// perform the conversion
if(converter == 0) {
// use the "invariant characters" conversion
if(length > fLength - start) {
length = fLength - start;
}
u_UCharsToChars(mySource, myTarget, length);
return length;
}
// there is no loop here since we assume the buffer is large enough
myTargetLimit = myTarget + arraySize;
if(myTargetLimit < myTarget) /* ptr wrapped around: pin to U_MAX_PTR */
myTargetLimit = (char*)U_MAX_PTR;
// perform the conversion
// there is no loop here since we assume the buffer is large enough
ucnv_fromUnicode(converter, &myTarget, myTargetLimit,
&mySource, mySourceEnd, NULL, TRUE, &status);
@ -822,7 +851,7 @@ UnicodeString::doCodepageCreate(const char *codepageData,
int32_t sourceLen = dataLength;
const char *mySource = codepageData;
const char *mySourceEnd = mySource + sourceLen;
UChar *myTarget = getArrayStart();
UChar *myTarget;
UErrorCode status = U_ZERO_ERROR;
int32_t arraySize = getCapacity();
@ -830,9 +859,12 @@ UnicodeString::doCodepageCreate(const char *codepageData,
UConverter *converter = 0;
// if the codepage is the default, use our cache
converter = (codepage == 0
? getDefaultConverter(status)
: ucnv_open(codepage, &status));
// if it is an empty string, then use the "invariant character" conversion
converter = (codepage == 0 ?
getDefaultConverter(status) :
*codepage == 0 ?
0 :
ucnv_open(codepage, &status));
// if we failed, set the appropriate flags and return
if(U_FAILURE(status)) {
@ -845,8 +877,37 @@ UnicodeString::doCodepageCreate(const char *codepageData,
return;
}
fHashCode = kInvalidHashCode;
// perform the conversion
do {
if(converter == 0) {
// use the "invariant characters" conversion
if(arraySize < dataLength) {
int32_t tempCapacity;
// allocate enough space for the dataLength, the refCount, and a NUL
UChar *temp = allocate(dataLength + 2, tempCapacity);
if(temp == 0) {
// set flags and return
setToBogus();
return;
}
fArray = temp;
fCapacity = tempCapacity;
setRefCount(1);
u_charsToUChars(codepageData, fArray + 1, dataLength);
fArray[dataLength + 1] = 0;
} else {
u_charsToUChars(codepageData, getArrayStart(), dataLength);
}
return;
}
myTarget = getArrayStart();
for(;;) {
// reset the error code
status = U_ZERO_ERROR;
@ -859,30 +920,24 @@ UnicodeString::doCodepageCreate(const char *codepageData,
arraySize = getCapacity() - fLength;
// allocate more space and copy data, if needed
if(fLength < dataLength) {
if(status == U_INDEX_OUTOFBOUNDS_ERROR) {
int32_t tempCapacity;
UChar *temp = allocate(fCapacity, tempCapacity);
if(! temp) {
// close the converter
if(codepage == 0)
releaseDefaultConverter(converter);
else
ucnv_close(converter);
// set flags and return
setToBogus();
return;
// set flags and return
setToBogus();
break;
}
// if we're not currently ref counted, shift the array right by one
if(fRefCounted == FALSE)
us_arrayCopy(fArray, 0, temp, 1, fLength);
// otherwise, copy the old array into temp, including the ref count
else
us_arrayCopy(fArray, 0, temp, 0, fLength + 1);
if(fRefCounted && removeRef() == 0)
delete [] fArray;
if(fRefCounted) {
// copy the old array into temp, including the ref count
us_arrayCopy(fArray, 0, temp, 0, fLength + 1);
delete [] fArray;
} else {
// if we're not currently ref counted, shift the array right by one
us_arrayCopy(fArray, 0, temp, 1, fLength);
}
fArray = temp;
fCapacity = tempCapacity;
@ -891,11 +946,10 @@ UnicodeString::doCodepageCreate(const char *codepageData,
myTarget = getArrayStart() + fLength;
arraySize = getCapacity() - fLength;
} else {
break;
}
}
while(status == U_INDEX_OUTOFBOUNDS_ERROR);
fHashCode = kInvalidHashCode;
// close the converter
if(codepage == 0)
@ -925,9 +979,6 @@ UnicodeString::getUChars() const
if(fBogus)
return 0;
// clone our array, if necessary
((UnicodeString*)this)->cloneArrayIfNeeded();
// no room for null, resize
if(getCapacity() <= fLength) {
// allocate at minimum the current capacity + needed space
@ -955,8 +1006,10 @@ UnicodeString::getUChars() const
((UnicodeString*)this)->setRefCount(1);
}
// tack on a trailing null
fArray[(fRefCounted ? 1 : 0) + fLength] = 0;
if(getArrayStart()[fLength] != 0) {
// tack on a trailing null
((UChar *)getArrayStart())[fLength] = 0;
}
return getArrayStart();
}

View file

@ -40,6 +40,30 @@
class Locale;
class UCharReference;
/**
* Unicode String literals in C++.
* Dependent on the platform properties, different UnicodeString
* constructors should be used to create a UnicodeString object from
* a string literal.
* The macros are defined for maximum performance.
* They work only for strings that contain "invariant characters", i.e.,
* only latin letters, digits, and some punctuation.
* See utypes.h for details.
*
* The string parameter must be a C string literal.
* The length of the string, not including the terminating
* <code>NUL</code>, must be specified as a constant.
* The U_STRING_DECL macro should be invoked exactly once for one
* such string variable before it is used.
*/
#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
# define UNICODE_STRING(cs, length) UnicodeString(TRUE, (UChar *)L ## cs, length)
#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
# define UNICODE_STRING(cs, length) UnicodeString(TRUE, (UChar *)cs, length)
#else
# define UNICODE_STRING(cs, length) UnicodeString(cs, length, "")
#endif
/**
* UnicodeString is a concrete implementation of the abstract class
* UnicodeText. UnicodeString performs codeset conversion from char*
@ -678,6 +702,9 @@ public:
* @param target the target buffer for extraction
* @param codepage the desired codepage for the characters. 0 has
* the special meaning of the default codepage
* If <code>codepage</code> is an empty string (<code>""</code>),
* then a simple conversion is performed on the codepage-invariant
* subset ("invariant characters") of the platform encoding. See utypes.h.
* @return the number of characters written to <TT>dst</TT>
*/
int32_t extract(UTextOffset start,
@ -1302,12 +1329,34 @@ public:
UnicodeString(const UChar *text,
int32_t textLength);
/**
* Aliasing UChar* constructor.
* The text will be used for the new UnicodeString object, but
* it will not be released when the UnicodeString is destroyed.
* Be careful not to attempt to modify the contents of the UnicodeString
* if the text is read-only. Operations that allocate an entirely
* new buffer are harmless.
*
* @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
* This must be true if <code>textLength==-1</code>.
* @param text The characters to alias for the UnicodeString.
* @param textLength The number of Unicode characters in <code>text</code> to alias.
* If -1, then this constructor will determine the length
* by calling <code>u_strlen()</code>.
*/
UnicodeString(bool_t isTerminated,
UChar *text,
int32_t textLength);
/**
* char* constructor.
* @param codepageData an array of bytes, null-terminated
* @param codepage the encoding of <TT>codepageData</TT>. The special
* value 0 for <TT>codepage</TT> indicates that the text is in the
* platform's default codepage.
* If <code>codepage</code> is an empty string (<code>""</code>),
* then a simple conversion is performed on the codepage-invariant
* subset ("invariant characters") of the platform encoding. See utypes.h.
*/
UnicodeString(const char *codepageData,
const char *codepage = 0);
@ -1319,6 +1368,9 @@ public:
* @param codepage the encoding of <TT>codepageData</TT>. The special
* value 0 for <TT>codepage</TT> indicates that the text is in the
* platform's default codepage.
* If <code>codepage</code> is an empty string (<code>""</code>),
* then a simple conversion is performed on the codepage-invariant
* subset ("invariant characters") of the platform encoding. See utypes.h.
*/
UnicodeString(const char *codepageData,
int32_t dataLength,
@ -1454,7 +1506,16 @@ private:
void pinIndices(UTextOffset& start,
int32_t& length) const;
// Real ctor for converting from codepage data
/*
* Real constructor for converting from codepage data.
* It assumes that it is called with !fRefCounted.
*
* If <code>codepage==0</code>, then the default converter
* is used for the platform encoding.
* If <code>codepage</code> is an empty string (<code>""</code>),
* then a simple conversion is performed on the codepage-invariant
* subset ("invariant characters") of the platform encoding. See utypes.h.
*/
void doCodepageCreate(const char *codepageData,
int32_t dataLength,
const char *codepage);
@ -1472,8 +1533,8 @@ private:
UChar *fArray; // the Unicode data
int32_t fLength; // number characters in fArray
int32_t fCapacity; // sizeof fArray
bool_t fRefCounted; // indicates if we own storage
int32_t fHashCode; // the hash code
bool_t fRefCounted; // indicates if we own storage
bool_t fBogus; // indicates if an operation failed
// constants

View file

@ -174,7 +174,7 @@ U_CAPI char* U_EXPORT2 u_austrcpy(char *s1,
* parameters.
* The string parameter must be a C string literal.
* The length of the string, not including the terminating
* <code>NUL</code> must be specified as a constant.
* <code>NUL</code>, must be specified as a constant.
* The U_STRING_DECL macro should be invoked exactly once for one
* such string variable before it is used.
*
@ -195,10 +195,10 @@ U_CAPI char* U_EXPORT2 u_austrcpy(char *s1,
* </pre>
*/
#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (UChar *)L ## cs }
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (const UChar *)L ## cs }
# define U_STRING_INIT(var, cs, length)
#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (UChar *)cs }
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (const UChar *)cs }
# define U_STRING_INIT(var, cs, length)
#else
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]