From 2cc78d6fce2929f8e10c400b33739e087831278b Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 11 Mar 2009 23:57:47 +0000 Subject: [PATCH] ICU-6606 split UnicodeString methods so that implicitly using the default codepage does not have to depend on ucnv_* functions if the default is UTF-8 X-SVN-Rev: 25554 --- icu4c/source/common/unicode/unistr.h | 57 +++++++++++++++++++++++++--- icu4c/source/common/unistr_cnv.cpp | 31 ++++++++++++++- 2 files changed, 80 insertions(+), 8 deletions(-) diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 3f2bcdef63f..9a28251d757 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -1448,6 +1448,34 @@ public: int32_t targetCapacity, enum EInvariant inv) const; +#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION + + /** + * Copy the characters in the range + * [start, start + length) into an array of characters + * in the platform's default codepage. + * This function does not write any more than targetLength + * characters but returns the length of the entire output string + * so that one can allocate a larger buffer and call the function again + * if necessary. + * The output string is NUL-terminated if possible. + * + * @param start offset of first character which will be copied + * @param startLength the number of characters to extract + * @param target the target buffer for extraction + * @param targetLength the length of the target buffer + * If target is NULL, then the number of bytes required for + * target is returned. + * @return the output string length, not including the terminating NUL + * @stable ICU 2.0 + */ + int32_t extract(int32_t start, + int32_t startLength, + char *target, + uint32_t targetLength) const; + +#endif + #if !UCONFIG_NO_CONVERSION /** @@ -1513,7 +1541,7 @@ public: int32_t startLength, char *target, uint32_t targetLength, - const char *codepage = 0) const; + const char *codepage) const; /** * Convert the UnicodeString into a codepage string using an existing UConverter. @@ -2811,6 +2839,26 @@ public: */ UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); +#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION + + /** + * char* constructor. + * @param codepageData an array of bytes, null-terminated, + * in the platform's default codepage. + * @stable ICU 2.0 + */ + UnicodeString(const char *codepageData); + + /** + * char* constructor. + * @param codepageData an array of bytes in the platform's default codepage. + * @param dataLength The number of bytes in codepageData. + * @stable ICU 2.0 + */ + UnicodeString(const char *codepageData, int32_t dataLength); + +#endif + #if !UCONFIG_NO_CONVERSION /** @@ -2830,8 +2878,7 @@ public: * * @stable ICU 2.0 */ - UnicodeString(const char *codepageData, - const char *codepage = 0); + UnicodeString(const char *codepageData, const char *codepage); /** * char* constructor. @@ -2850,9 +2897,7 @@ public: * * @stable ICU 2.0 */ - UnicodeString(const char *codepageData, - int32_t dataLength, - const char *codepage = 0); + UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); /** * char * / UConverter constructor. diff --git a/icu4c/source/common/unistr_cnv.cpp b/icu4c/source/common/unistr_cnv.cpp index 1a6819e0d66..228dbb77cef 100644 --- a/icu4c/source/common/unistr_cnv.cpp +++ b/icu4c/source/common/unistr_cnv.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999-2007, International Business Machines +* Copyright (C) 1999-2009, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -26,6 +26,7 @@ #include "unicode/ustring.h" #include "unicode/unistr.h" #include "unicode/ucnv.h" +#include "ucnv_imp.h" #include "putilimp.h" #include "ustr_cnv.h" #include "ustr_imp.h" @@ -36,6 +37,25 @@ U_NAMESPACE_BEGIN // Constructors //======================================== +UnicodeString::UnicodeString(const char *codepageData) + : fShortLength(0), + fFlags(kShortString) +{ + if(codepageData != 0) { + doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), 0); + } +} + +UnicodeString::UnicodeString(const char *codepageData, + int32_t dataLength) + : fShortLength(0), + fFlags(kShortString) +{ + if(codepageData != 0) { + doCodepageCreate(codepageData, dataLength, 0); + } +} + UnicodeString::UnicodeString(const char *codepageData, const char *codepage) : fShortLength(0), @@ -46,7 +66,6 @@ UnicodeString::UnicodeString(const char *codepageData, } } - UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage) @@ -98,6 +117,14 @@ UnicodeString::UnicodeString(const char *src, int32_t srcLength, //======================================== // Codeset conversion //======================================== +int32_t +UnicodeString::extract(int32_t start, + int32_t length, + char *target, + uint32_t dstSize) const { + return extract(start, length, target, dstSize, 0); +} + int32_t UnicodeString::extract(int32_t start, int32_t length,