From 2cc78d6fce2929f8e10c400b33739e087831278b Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Wed, 11 Mar 2009 23:57:47 +0000
Subject: [PATCH] ICU-6606 split UnicodeString methods so that implicitly using
 the default codepage does not have to depend on ucnv_* functions if the
 default is UTF-8

X-SVN-Rev: 25554
---
 icu4c/source/common/unicode/unistr.h | 57 +++++++++++++++++++++++++---
 icu4c/source/common/unistr_cnv.cpp   | 31 ++++++++++++++-
 2 files changed, 80 insertions(+), 8 deletions(-)
diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h
index 3f2bcdef63f..9a28251d757 100644
--- a/icu4c/source/common/unicode/unistr.h
+++ b/icu4c/source/common/unicode/unistr.h
@@ -1448,6 +1448,34 @@ public:
            int32_t targetCapacity,
            enum EInvariant inv) const;
 
+#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
+
+  /**
+   * Copy the characters in the range
+   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
+   * in the platform's default codepage.
+   * This function does not write any more than <code>targetLength</code>
+   * characters but returns the length of the entire output string
+   * so that one can allocate a larger buffer and call the function again
+   * if necessary.
+   * The output string is NUL-terminated if possible.
+   *
+   * @param start offset of first character which will be copied
+   * @param startLength the number of characters to extract
+   * @param target the target buffer for extraction
+   * @param targetLength the length of the target buffer
+   * If <TT>target</TT> is NULL, then the number of bytes required for
+   * <TT>target</TT> is returned.
+   * @return the output string length, not including the terminating NUL
+   * @stable ICU 2.0
+   */
+  int32_t extract(int32_t start,
+           int32_t startLength,
+           char *target,
+           uint32_t targetLength) const;
+
+#endif
+
 #if !UCONFIG_NO_CONVERSION
 
   /**
@@ -1513,7 +1541,7 @@ public:
            int32_t startLength,
            char *target,
            uint32_t targetLength,
-           const char *codepage = 0) const;
+           const char *codepage) const;
 
   /**
    * Convert the UnicodeString into a codepage string using an existing UConverter.
@@ -2811,6 +2839,26 @@ public:
    */
   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
 
+#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
+
+  /**
+   * char* constructor.
+   * @param codepageData an array of bytes, null-terminated,
+   *                     in the platform's default codepage.
+   * @stable ICU 2.0
+   */
+  UnicodeString(const char *codepageData);
+
+  /**
+   * char* constructor.
+   * @param codepageData an array of bytes in the platform's default codepage.
+   * @param dataLength The number of bytes in <TT>codepageData</TT>.
+   * @stable ICU 2.0
+   */
+  UnicodeString(const char *codepageData, int32_t dataLength);
+
+#endif
+
 #if !UCONFIG_NO_CONVERSION
 
   /**
@@ -2830,8 +2878,7 @@ public:
    *
    * @stable ICU 2.0
    */
-  UnicodeString(const char *codepageData,
-        const char *codepage = 0);
+  UnicodeString(const char *codepageData, const char *codepage);
 
   /**
    * char* constructor.
@@ -2850,9 +2897,7 @@ public:
    *
    * @stable ICU 2.0
    */
-  UnicodeString(const char *codepageData,
-        int32_t dataLength,
-        const char *codepage = 0);
+  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
 
   /**
    * char * / UConverter constructor.
diff --git a/icu4c/source/common/unistr_cnv.cpp b/icu4c/source/common/unistr_cnv.cpp
index 1a6819e0d66..228dbb77cef 100644
--- a/icu4c/source/common/unistr_cnv.cpp
+++ b/icu4c/source/common/unistr_cnv.cpp
@@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1999-2007, International Business Machines
+*   Copyright (C) 1999-2009, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@@ -26,6 +26,7 @@
 #include "unicode/ustring.h"
 #include "unicode/unistr.h"
 #include "unicode/ucnv.h"
+#include "ucnv_imp.h"
 #include "putilimp.h"
 #include "ustr_cnv.h"
 #include "ustr_imp.h"
@@ -36,6 +37,25 @@ U_NAMESPACE_BEGIN
 // Constructors
 //========================================
 
+UnicodeString::UnicodeString(const char *codepageData)
+  : fShortLength(0),
+    fFlags(kShortString)
+{
+    if(codepageData != 0) {
+        doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), 0);
+    }
+}
+
+UnicodeString::UnicodeString(const char *codepageData,
+                             int32_t dataLength)
+  : fShortLength(0),
+    fFlags(kShortString)
+{
+    if(codepageData != 0) {
+        doCodepageCreate(codepageData, dataLength, 0);
+    }
+}
+
 UnicodeString::UnicodeString(const char *codepageData,
                              const char *codepage)
   : fShortLength(0),
@@ -46,7 +66,6 @@ UnicodeString::UnicodeString(const char *codepageData,
     }
 }
 
-
 UnicodeString::UnicodeString(const char *codepageData,
                              int32_t dataLength,
                              const char *codepage)
@@ -98,6 +117,14 @@ UnicodeString::UnicodeString(const char *src, int32_t srcLength,
 //========================================
 // Codeset conversion
 //========================================
+int32_t
+UnicodeString::extract(int32_t start,
+                       int32_t length,
+                       char *target,
+                       uint32_t dstSize) const {
+    return extract(start, length, target, dstSize, 0);
+}
+
 int32_t
 UnicodeString::extract(int32_t start,
                        int32_t length,