diff --git a/icu4c/source/common/putil.c b/icu4c/source/common/putil.c
index 39fdf6e8716..34d7cdf91f4 100644
--- a/icu4c/source/common/putil.c
+++ b/icu4c/source/common/putil.c
@@ -1378,3 +1378,79 @@ const char* icu_getDefaultCodepage()
return DEFAULT_CONVERTER_NAME;
}
+
+#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+/*
+ * These maps for ASCII to/from EBCDIC are from
+ * "UTF-EBCDIC - EBCDIC-Friendly Unicode (or UCS) Transformation Format"
+ * at http://www.unicode.org/unicode/reports/tr16/
+ * but modified to explicitly exclude the variant graphical characters
+ * that are in ASCII at 0xa0 and above.
+ */
+
+static uint8_t asciiFromEbcdic[256]={
+ 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
+ 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
+ 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
+ 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
+ 0x2D, 0x2F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
+ 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00,
+ 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x5C, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9F
+};
+
+static uint8_t ebcdicFromAscii[256]={
+ 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
+ 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
+ 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
+ 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
+ 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+ 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
+ 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+#endif
+
+U_CAPI void U_EXPORT2
+u_charsToUChars(const char *cs, UChar *us, UTextOffset length) {
+ while(length>0) {
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+ *us++=(UChar)(uint8_t)(*cs++);
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ *us++=(UChar)asciiFromEbcdic[(uint8_t)(*cs++)];
+#else
+# error U_CHARSET_FAMILY is not valid
+#endif
+ --length;
+ }
+}
+
+U_CAPI void U_EXPORT2
+u_UCharsToChars(const UChar *us, char *cs, UTextOffset length) {
+ while(length>0) {
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+ *cs++=(char)(*us++);
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ *cs++=(char)ebcdicFromAscii[*us++];
+#else
+# error U_CHARSET_FAMILY is not valid
+#endif
+ --length;
+ }
+}
diff --git a/icu4c/source/common/putil.h b/icu4c/source/common/putil.h
index 6a623aad11e..495f862aba6 100644
--- a/icu4c/source/common/putil.h
+++ b/icu4c/source/common/putil.h
@@ -158,7 +158,7 @@ U_CAPI const char* U_EXPORT2 icu_getDefaultLocaleID(void);
*/
U_CAPI double U_EXPORT2 icu_nextDouble(double d, bool_t positive);
-/*
+/**
* Filesystem file and path separator characters.
* Example: '/' and ':' on Unix, '\\' and ';' on Windows.
*/
@@ -179,4 +179,41 @@ U_CAPI double U_EXPORT2 icu_nextDouble(double d, bool_t positive);
# define U_PATH_SEP_STRING ":"
#endif
+/**
+ * Convert char characters to UChar characters.
+ * This utility function is useful only for "invariant characters"
+ * that are encoded in the platform default encoding.
+ * They are a small, constant subset of the encoding and include
+ * just the latin letters, digits, and some punctuation.
+ * For details, see utypes.h .
+ *
+ * @param cs Input string, points to length
+ * character bytes from a subset of the platform encoding.
+ * @param us Output string, points to memory for length
+ * Unicode characters.
+ * @param length The number of characters to convert; this may
+ * include the terminating NUL
.
+ */
+U_CAPI void U_EXPORT2
+u_charsToUChars(const char *cs, UChar *us, UTextOffset length);
+
+/**
+ * Convert UChar characters to char characters.
+ * This utility function is useful only for "invariant characters"
+ * that can be encoded in the platform default encoding.
+ * They are a small, constant subset of the encoding and include
+ * just the latin letters, digits, and some punctuation.
+ * For details, see utypes.h .
+ *
+ * @param us Input string, points to length
+ * Unicode characters that can be encoded with the
+ * codepage-invariant subset of the platform encoding.
+ * @param cs Output string, points to memory for length
+ * character bytes.
+ * @param length The number of characters to convert; this may
+ * include the terminating NUL
.
+ */
+U_CAPI void U_EXPORT2
+u_UCharsToChars(const UChar *us, char *cs, UTextOffset length);
+
#endif
diff --git a/icu4c/source/common/ustring.h b/icu4c/source/common/ustring.h
index 595e4490f8c..48938586120 100644
--- a/icu4c/source/common/ustring.h
+++ b/icu4c/source/common/ustring.h
@@ -158,9 +158,51 @@ U_CAPI UChar* U_EXPORT2 u_uastrncpy(UChar *ucs1,
*/
U_CAPI char* U_EXPORT2 u_austrcpy(char *s1,
const UChar *us2 );
+
+/**
+ * Unicode String literals in C.
+ * We need one macro to declare a variable for the string
+ * and to statically preinitialize it if possible,
+ * and a second macro to dynamically intialize such a string variable if necessary.
+ *
+ * The macros are defined for maximum performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * A pair of macros for a single string must be used with the same
+ * parameters.
+ * The string parameter must be a C string literal.
+ * The length of the string, not including the terminating
+ * NUL
must be specified as a constant.
+ * The U_STRING_DECL macro should be invoked exactly once for one
+ * such string variable before it is used.
+ *
+ * Usage:
+ *
+ * U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11); + * U_STRING_DECL(ustringVar2, "jumps 5%", 8); + * static bool_t didInit=FALSE; + * + * int32_t function() { + * if(!didInit) { + * U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11); + * U_STRING_INIT(ustringVar2, "jumps 5%", 8); + * didInit=TRUE; + * } + * return u_strcmp(ustringVar1, ustringVar2); + * } + *+ */ +#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY +# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (UChar *)L ## cs } +# define U_STRING_INIT(var, cs, length) +#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY +# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (UChar *)cs } +# define U_STRING_INIT(var, cs, length) +#else +# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1] +# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1) #endif - - - - +#endif diff --git a/icu4c/source/test/intltest/strtest.cpp b/icu4c/source/test/intltest/strtest.cpp index 0926de1cc33..de4bcfa0384 100644 --- a/icu4c/source/test/intltest/strtest.cpp +++ b/icu4c/source/test/intltest/strtest.cpp @@ -22,6 +22,7 @@ #include "putil.h" #include "intltest.h" #include "strtest.h" +#include "ustring.h" void StringTest::TestEndian() { union { @@ -49,6 +50,8 @@ void StringTest::TestCharsetFamily() { } } +U_STRING_DECL(ustringVar, "aZ0 -", 5); + void StringTest::runIndexedTest(int32_t index, bool_t exec, char *&name, char *par) { if(exec) { logln("TestSuite Character and String Test: "); @@ -72,6 +75,23 @@ void StringTest::runIndexedTest(int32_t index, bool_t exec, char *&name, char *p TestCharsetFamily(); } break; + case 3: + name="Test_U_STRING"; + if(exec) { + U_STRING_INIT(ustringVar, "aZ0 -", 5); + if( sizeof(ustringVar)/sizeof(*ustringVar)!=6 || + ustringVar[0]!=0x61 || + ustringVar[1]!=0x5a || + ustringVar[2]!=0x30 || + ustringVar[3]!=0x20 || + ustringVar[4]!=0x2d || + ustringVar[5]!=0 + ) { + errln("Test_U_STRING: U_STRING_DECL with U_STRING_INIT does not work right! " + "See putil.h and utypes.h with platform.h."); + } + } + break; default: name=""; break;