ICU-11551 new UNISTR_OBJECT_SIZE=64 for 27 UChars stored internally on 64-bit machine

X-SVN-Rev: 37339
This commit is contained in:
Markus Scherer 2015-04-15 18:49:55 +00:00
parent 7daa0c8e78
commit 1d4b6a6ec7
5 changed files with 134 additions and 28 deletions
icu4c/source

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 1998-2014, International Business Machines
* Copyright (C) 1998-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@ -173,15 +173,52 @@ class UnicodeStringAppendable; // unicode/appendable.h
# endif
#endif
/**
* \def UNISTR_OBJECT_SIZE
* Desired sizeof(UnicodeString) in bytes.
* It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
* The object size may want to be a multiple of 16 bytes,
* which is a common granularity for heap allocation.
*
* Any space inside the object beyond sizeof(vtable pointer) + 2
* is available for storing short strings inside the object.
* The bigger the object, the longer a string that can be stored inside the object,
* without additional heap allocation.
*
* Depending on a platform's pointer size, pointer alignment requirements,
* and struct padding, the compiler will usually round up sizeof(UnicodeString)
* to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
* to hold the fields for heap-allocated strings.
* Such a minimum size also ensures that the object is easily large enough
* to hold at least 2 UChars, for one supplementary code point (U16_MAX_LENGTH).
*
* sizeof(UnicodeString) >= 48 should work for all known platforms.
*
* For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
* sizeof(UnicodeString) = 64 would leave space for
* (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
* UChars stored inside the object.
*
* The minimum object size on a 64-bit machine would be
* 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
* and the internal buffer would hold up to 11 UChars in that case.
*
* @see U16_MAX_LENGTH
* @draft ICU 56
*/
#ifndef UNISTR_OBJECT_SIZE
# define UNISTR_OBJECT_SIZE 64
#endif
/**
* UnicodeString is a string class that stores Unicode characters directly and provides
* similar functionality as the Java String and StringBuffer classes.
* similar functionality as the Java String and StringBuffer/StringBuilder classes.
* It is a concrete implementation of the abstract class Replaceable (for transliteration).
*
* The UnicodeString class is not suitable for subclassing.
*
* <p>For an overview of Unicode strings in C and C++ see the
* <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
* <a href="http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-">User Guide Strings chapter</a>.</p>
*
* <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
* A Unicode character may be stored with either one code unit
@ -3474,9 +3511,12 @@ private:
// constants
enum {
// Set the stack buffer size so that sizeof(UnicodeString) is,
// naturally (without padding), a multiple of sizeof(pointer).
US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
/**
* Size of stack buffer for short strings.
* Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
* @see UNISTR_OBJECT_SIZE
*/
US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
kGrowSize=128, // grow size for this buffer
kInvalidHashCode=0, // invalid hash code
@ -3544,9 +3584,10 @@ private:
* (Padding at the end of fFields is ok:
* As long as it is no larger than fStackFields, it is not wasted space.)
*
* For some of the history of the UnicodeString class fields layout,
* see ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
* and ticket #8322 "why is sizeof(UnicodeString)==48?".
* For some of the history of the UnicodeString class fields layout, see
* - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
* - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
* - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
*/
// (implicit) *vtable;
union StackBufferOrFields {
@ -3558,9 +3599,11 @@ private:
} fStackFields;
struct {
int16_t fLengthAndFlags; // bit fields: see constants above
UChar *fArray; // the Unicode data
int32_t fCapacity; // capacity of fArray (in UChars)
int32_t fLength; // number of characters in fArray if >127; else undefined
int32_t fCapacity; // capacity of fArray (in UChars)
// array pointer last to minimize padding for machines with P128 data model
// or pointer sizes that are not a power of 2
UChar *fArray; // the Unicode data
} fFields;
} fUnion;
};

View file

@ -1,6 +1,6 @@
/*
******************************************************************************
* Copyright (C) 1999-2014, International Business Machines Corporation and
* Copyright (C) 1999-2015, International Business Machines Corporation and
* others. All Rights Reserved.
******************************************************************************
*
@ -367,8 +367,40 @@ UnicodeString::allocate(int32_t capacity) {
//========================================
// Destructor
//========================================
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
static u_atomic_int32_t finalLengthCounts[0x400]; // UnicodeString::kMaxShortLength+1
static u_atomic_int32_t beyondCount(0);
U_CAPI void unistr_printLengths() {
int32_t i;
for(i = 0; i <= 59; ++i) {
printf("%2d, %9d\n", i, (int32_t)finalLengthCounts[i]);
}
int32_t beyond = beyondCount;
for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
beyond += finalLengthCounts[i];
}
printf(">59, %9d\n", beyond);
}
#endif
UnicodeString::~UnicodeString()
{
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
// Count lengths of strings at the end of their lifetime.
// Useful for discussion of a desirable stack buffer size.
// Count the contents length, not the optional NUL terminator nor further capacity.
// Ignore open-buffer strings and strings which alias external storage.
if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) {
if(hasShortLength()) {
umtx_atomic_inc(finalLengthCounts + getShortLength());
} else {
umtx_atomic_inc(&beyondCount);
}
}
#endif
releaseArray();
}

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2013, International Business Machines Corporation and
* Copyright (c) 1997-2015, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/********************************************************************************
@ -73,6 +73,10 @@ void ctest_setICU_DATA(void);
static int gOrigArgc;
static const char* const * gOrigArgv;
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
U_CAPI void unistr_printLengths();
#endif
int main(int argc, const char* const argv[])
{
int nerrors = 0;
@ -233,6 +237,10 @@ int main(int argc, const char* const argv[])
} /* End of loop that repeats the entire test, if requested. (Normally doesn't loop) */
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
unistr_printLengths();
#endif
endTime = uprv_getRawUTCtime();
diffTime = (int32_t)(endTime - startTime);
printf("Elapsed Time: %02d:%02d:%02d.%03d\n",

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2014, International Business Machines Corporation and
* Copyright (c) 1997-2015, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -1166,6 +1166,10 @@ IntlTest::run_phase2( char* name, char* par ) // supports reporting memory leaks
# define TRY_CNV_2 "sjis"
#endif
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
U_CAPI void unistr_printLengths();
#endif
int
main(int argc, char* argv[])
{
@ -1524,6 +1528,10 @@ main(int argc, char* argv[])
u_cleanup();
}
#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
unistr_printLengths();
#endif
fprintf(stdout, "--------------------------------------\n");
if (execCount <= 0) {

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2014, International Business Machines Corporation and
* Copyright (c) 1997-2015, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -2098,20 +2098,35 @@ UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
void
UnicodeStringTest::TestSizeofUnicodeString() {
// See the comments in unistr.h near the declaration of UnicodeString's fields.
// See the API comments for UNISTR_OBJECT_SIZE.
size_t sizeofUniStr=sizeof(UnicodeString);
size_t expected;
switch(sizeof(void *)) {
case 4:
expected=32;
break;
case 8:
expected=40;
break;
default:
logln("This platform has neither 32-bit nor 64-bit pointers.");
return;
}
size_t expected=UNISTR_OBJECT_SIZE;
if(expected!=sizeofUniStr) {
errln("sizeof(UnicodeString)=%d, expected %d", (int)sizeofUniStr, (int)expected);
// Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
// of the compiler might add more internal padding than expected.
errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
(int)sizeofUniStr, (int)expected);
}
if(sizeofUniStr<32) {
errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
}
// We assume that the entire UnicodeString object,
// minus the vtable pointer and 2 bytes for flags and short length,
// is available for internal storage of UChars.
int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
UnicodeString s;
const UChar *emptyBuffer=s.getBuffer();
for(int32_t i=0; i<expectedStackBufferLength; ++i) {
s.append((UChar)0x2e);
}
const UChar *fullBuffer=s.getBuffer();
if(fullBuffer!=emptyBuffer) {
errln("unexpected reallocation when filling with assumed stack buffer size of %d",
expectedStackBufferLength);
}
const UChar *terminatedBuffer=s.getTerminatedBuffer();
if(terminatedBuffer==emptyBuffer) {
errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
expectedStackBufferLength);
}
}