mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 00:43:32 +00:00
ICU-11551 new UNISTR_OBJECT_SIZE=64 for 27 UChars stored internally on 64-bit machine
X-SVN-Rev: 37339
This commit is contained in:
parent
7daa0c8e78
commit
1d4b6a6ec7
5 changed files with 134 additions and 28 deletions
icu4c/source
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1998-2014, International Business Machines
|
||||
* Copyright (C) 1998-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
|
@ -173,15 +173,52 @@ class UnicodeStringAppendable; // unicode/appendable.h
|
|||
# endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UNISTR_OBJECT_SIZE
|
||||
* Desired sizeof(UnicodeString) in bytes.
|
||||
* It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
|
||||
* The object size may want to be a multiple of 16 bytes,
|
||||
* which is a common granularity for heap allocation.
|
||||
*
|
||||
* Any space inside the object beyond sizeof(vtable pointer) + 2
|
||||
* is available for storing short strings inside the object.
|
||||
* The bigger the object, the longer a string that can be stored inside the object,
|
||||
* without additional heap allocation.
|
||||
*
|
||||
* Depending on a platform's pointer size, pointer alignment requirements,
|
||||
* and struct padding, the compiler will usually round up sizeof(UnicodeString)
|
||||
* to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
|
||||
* to hold the fields for heap-allocated strings.
|
||||
* Such a minimum size also ensures that the object is easily large enough
|
||||
* to hold at least 2 UChars, for one supplementary code point (U16_MAX_LENGTH).
|
||||
*
|
||||
* sizeof(UnicodeString) >= 48 should work for all known platforms.
|
||||
*
|
||||
* For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
|
||||
* sizeof(UnicodeString) = 64 would leave space for
|
||||
* (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
|
||||
* UChars stored inside the object.
|
||||
*
|
||||
* The minimum object size on a 64-bit machine would be
|
||||
* 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
|
||||
* and the internal buffer would hold up to 11 UChars in that case.
|
||||
*
|
||||
* @see U16_MAX_LENGTH
|
||||
* @draft ICU 56
|
||||
*/
|
||||
#ifndef UNISTR_OBJECT_SIZE
|
||||
# define UNISTR_OBJECT_SIZE 64
|
||||
#endif
|
||||
|
||||
/**
|
||||
* UnicodeString is a string class that stores Unicode characters directly and provides
|
||||
* similar functionality as the Java String and StringBuffer classes.
|
||||
* similar functionality as the Java String and StringBuffer/StringBuilder classes.
|
||||
* It is a concrete implementation of the abstract class Replaceable (for transliteration).
|
||||
*
|
||||
* The UnicodeString class is not suitable for subclassing.
|
||||
*
|
||||
* <p>For an overview of Unicode strings in C and C++ see the
|
||||
* <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
|
||||
* <a href="http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-">User Guide Strings chapter</a>.</p>
|
||||
*
|
||||
* <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
|
||||
* A Unicode character may be stored with either one code unit
|
||||
|
@ -3474,9 +3511,12 @@ private:
|
|||
|
||||
// constants
|
||||
enum {
|
||||
// Set the stack buffer size so that sizeof(UnicodeString) is,
|
||||
// naturally (without padding), a multiple of sizeof(pointer).
|
||||
US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
|
||||
/**
|
||||
* Size of stack buffer for short strings.
|
||||
* Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
|
||||
* @see UNISTR_OBJECT_SIZE
|
||||
*/
|
||||
US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
|
||||
kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
|
||||
kGrowSize=128, // grow size for this buffer
|
||||
kInvalidHashCode=0, // invalid hash code
|
||||
|
@ -3544,9 +3584,10 @@ private:
|
|||
* (Padding at the end of fFields is ok:
|
||||
* As long as it is no larger than fStackFields, it is not wasted space.)
|
||||
*
|
||||
* For some of the history of the UnicodeString class fields layout,
|
||||
* see ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
|
||||
* and ticket #8322 "why is sizeof(UnicodeString)==48?".
|
||||
* For some of the history of the UnicodeString class fields layout, see
|
||||
* - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
|
||||
* - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
|
||||
* - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
|
||||
*/
|
||||
// (implicit) *vtable;
|
||||
union StackBufferOrFields {
|
||||
|
@ -3558,9 +3599,11 @@ private:
|
|||
} fStackFields;
|
||||
struct {
|
||||
int16_t fLengthAndFlags; // bit fields: see constants above
|
||||
UChar *fArray; // the Unicode data
|
||||
int32_t fCapacity; // capacity of fArray (in UChars)
|
||||
int32_t fLength; // number of characters in fArray if >127; else undefined
|
||||
int32_t fCapacity; // capacity of fArray (in UChars)
|
||||
// array pointer last to minimize padding for machines with P128 data model
|
||||
// or pointer sizes that are not a power of 2
|
||||
UChar *fArray; // the Unicode data
|
||||
} fFields;
|
||||
} fUnion;
|
||||
};
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1999-2014, International Business Machines Corporation and
|
||||
* Copyright (C) 1999-2015, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
******************************************************************************
|
||||
*
|
||||
|
@ -367,8 +367,40 @@ UnicodeString::allocate(int32_t capacity) {
|
|||
//========================================
|
||||
// Destructor
|
||||
//========================================
|
||||
|
||||
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
|
||||
static u_atomic_int32_t finalLengthCounts[0x400]; // UnicodeString::kMaxShortLength+1
|
||||
static u_atomic_int32_t beyondCount(0);
|
||||
|
||||
U_CAPI void unistr_printLengths() {
|
||||
int32_t i;
|
||||
for(i = 0; i <= 59; ++i) {
|
||||
printf("%2d, %9d\n", i, (int32_t)finalLengthCounts[i]);
|
||||
}
|
||||
int32_t beyond = beyondCount;
|
||||
for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
|
||||
beyond += finalLengthCounts[i];
|
||||
}
|
||||
printf(">59, %9d\n", beyond);
|
||||
}
|
||||
#endif
|
||||
|
||||
UnicodeString::~UnicodeString()
|
||||
{
|
||||
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
|
||||
// Count lengths of strings at the end of their lifetime.
|
||||
// Useful for discussion of a desirable stack buffer size.
|
||||
// Count the contents length, not the optional NUL terminator nor further capacity.
|
||||
// Ignore open-buffer strings and strings which alias external storage.
|
||||
if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) {
|
||||
if(hasShortLength()) {
|
||||
umtx_atomic_inc(finalLengthCounts + getShortLength());
|
||||
} else {
|
||||
umtx_atomic_inc(&beyondCount);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
releaseArray();
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2013, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2015, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/********************************************************************************
|
||||
|
@ -73,6 +73,10 @@ void ctest_setICU_DATA(void);
|
|||
static int gOrigArgc;
|
||||
static const char* const * gOrigArgv;
|
||||
|
||||
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
|
||||
U_CAPI void unistr_printLengths();
|
||||
#endif
|
||||
|
||||
int main(int argc, const char* const argv[])
|
||||
{
|
||||
int nerrors = 0;
|
||||
|
@ -233,6 +237,10 @@ int main(int argc, const char* const argv[])
|
|||
|
||||
} /* End of loop that repeats the entire test, if requested. (Normally doesn't loop) */
|
||||
|
||||
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
|
||||
unistr_printLengths();
|
||||
#endif
|
||||
|
||||
endTime = uprv_getRawUTCtime();
|
||||
diffTime = (int32_t)(endTime - startTime);
|
||||
printf("Elapsed Time: %02d:%02d:%02d.%03d\n",
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2014, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2015, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -1166,6 +1166,10 @@ IntlTest::run_phase2( char* name, char* par ) // supports reporting memory leaks
|
|||
# define TRY_CNV_2 "sjis"
|
||||
#endif
|
||||
|
||||
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
|
||||
U_CAPI void unistr_printLengths();
|
||||
#endif
|
||||
|
||||
int
|
||||
main(int argc, char* argv[])
|
||||
{
|
||||
|
@ -1524,6 +1528,10 @@ main(int argc, char* argv[])
|
|||
u_cleanup();
|
||||
}
|
||||
|
||||
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
|
||||
unistr_printLengths();
|
||||
#endif
|
||||
|
||||
fprintf(stdout, "--------------------------------------\n");
|
||||
|
||||
if (execCount <= 0) {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2014, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2015, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -2098,20 +2098,35 @@ UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
|
|||
void
|
||||
UnicodeStringTest::TestSizeofUnicodeString() {
|
||||
// See the comments in unistr.h near the declaration of UnicodeString's fields.
|
||||
// See the API comments for UNISTR_OBJECT_SIZE.
|
||||
size_t sizeofUniStr=sizeof(UnicodeString);
|
||||
size_t expected;
|
||||
switch(sizeof(void *)) {
|
||||
case 4:
|
||||
expected=32;
|
||||
break;
|
||||
case 8:
|
||||
expected=40;
|
||||
break;
|
||||
default:
|
||||
logln("This platform has neither 32-bit nor 64-bit pointers.");
|
||||
return;
|
||||
}
|
||||
size_t expected=UNISTR_OBJECT_SIZE;
|
||||
if(expected!=sizeofUniStr) {
|
||||
errln("sizeof(UnicodeString)=%d, expected %d", (int)sizeofUniStr, (int)expected);
|
||||
// Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
|
||||
// of the compiler might add more internal padding than expected.
|
||||
errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
|
||||
(int)sizeofUniStr, (int)expected);
|
||||
}
|
||||
if(sizeofUniStr<32) {
|
||||
errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
|
||||
}
|
||||
// We assume that the entire UnicodeString object,
|
||||
// minus the vtable pointer and 2 bytes for flags and short length,
|
||||
// is available for internal storage of UChars.
|
||||
int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
|
||||
UnicodeString s;
|
||||
const UChar *emptyBuffer=s.getBuffer();
|
||||
for(int32_t i=0; i<expectedStackBufferLength; ++i) {
|
||||
s.append((UChar)0x2e);
|
||||
}
|
||||
const UChar *fullBuffer=s.getBuffer();
|
||||
if(fullBuffer!=emptyBuffer) {
|
||||
errln("unexpected reallocation when filling with assumed stack buffer size of %d",
|
||||
expectedStackBufferLength);
|
||||
}
|
||||
const UChar *terminatedBuffer=s.getTerminatedBuffer();
|
||||
if(terminatedBuffer==emptyBuffer) {
|
||||
errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
|
||||
expectedStackBufferLength);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue