mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-68 implement unicode string literals in c
X-SVN-Rev: 263
This commit is contained in:
parent
3dce1c03f5
commit
1569b465a5
4 changed files with 180 additions and 5 deletions
|
@ -1378,3 +1378,79 @@ const char* icu_getDefaultCodepage()
|
|||
|
||||
return DEFAULT_CONVERTER_NAME;
|
||||
}
|
||||
|
||||
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
/*
|
||||
* These maps for ASCII to/from EBCDIC are from
|
||||
* "UTF-EBCDIC - EBCDIC-Friendly Unicode (or UCS) Transformation Format"
|
||||
* at http://www.unicode.org/unicode/reports/tr16/
|
||||
* but modified to explicitly exclude the variant graphical characters
|
||||
* that are in ASCII at 0xa0 and above.
|
||||
*/
|
||||
|
||||
static uint8_t asciiFromEbcdic[256]={
|
||||
0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
||||
0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
|
||||
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
|
||||
0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
|
||||
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
|
||||
0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
|
||||
0x2D, 0x2F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
|
||||
0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00,
|
||||
0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x5C, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9F
|
||||
};
|
||||
|
||||
static uint8_t ebcdicFromAscii[256]={
|
||||
0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
||||
0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
|
||||
0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
|
||||
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
|
||||
0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
|
||||
0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
|
||||
0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
|
||||
0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
|
||||
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
|
||||
0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
#endif
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
u_charsToUChars(const char *cs, UChar *us, UTextOffset length) {
|
||||
while(length>0) {
|
||||
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
*us++=(UChar)(uint8_t)(*cs++);
|
||||
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
*us++=(UChar)asciiFromEbcdic[(uint8_t)(*cs++)];
|
||||
#else
|
||||
# error U_CHARSET_FAMILY is not valid
|
||||
#endif
|
||||
--length;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
u_UCharsToChars(const UChar *us, char *cs, UTextOffset length) {
|
||||
while(length>0) {
|
||||
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
*cs++=(char)(*us++);
|
||||
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
*cs++=(char)ebcdicFromAscii[*us++];
|
||||
#else
|
||||
# error U_CHARSET_FAMILY is not valid
|
||||
#endif
|
||||
--length;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -158,7 +158,7 @@ U_CAPI const char* U_EXPORT2 icu_getDefaultLocaleID(void);
|
|||
*/
|
||||
U_CAPI double U_EXPORT2 icu_nextDouble(double d, bool_t positive);
|
||||
|
||||
/*
|
||||
/**
|
||||
* Filesystem file and path separator characters.
|
||||
* Example: '/' and ':' on Unix, '\\' and ';' on Windows.
|
||||
*/
|
||||
|
@ -179,4 +179,41 @@ U_CAPI double U_EXPORT2 icu_nextDouble(double d, bool_t positive);
|
|||
# define U_PATH_SEP_STRING ":"
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Convert char characters to UChar characters.
|
||||
* This utility function is useful only for "invariant characters"
|
||||
* that are encoded in the platform default encoding.
|
||||
* They are a small, constant subset of the encoding and include
|
||||
* just the latin letters, digits, and some punctuation.
|
||||
* For details, see utypes.h .
|
||||
*
|
||||
* @param cs Input string, points to <code>length</code>
|
||||
* character bytes from a subset of the platform encoding.
|
||||
* @param us Output string, points to memory for <code>length</code>
|
||||
* Unicode characters.
|
||||
* @param length The number of characters to convert; this may
|
||||
* include the terminating <code>NUL</code>.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_charsToUChars(const char *cs, UChar *us, UTextOffset length);
|
||||
|
||||
/**
|
||||
* Convert UChar characters to char characters.
|
||||
* This utility function is useful only for "invariant characters"
|
||||
* that can be encoded in the platform default encoding.
|
||||
* They are a small, constant subset of the encoding and include
|
||||
* just the latin letters, digits, and some punctuation.
|
||||
* For details, see utypes.h .
|
||||
*
|
||||
* @param us Input string, points to <code>length</code>
|
||||
* Unicode characters that can be encoded with the
|
||||
* codepage-invariant subset of the platform encoding.
|
||||
* @param cs Output string, points to memory for <code>length</code>
|
||||
* character bytes.
|
||||
* @param length The number of characters to convert; this may
|
||||
* include the terminating <code>NUL</code>.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_UCharsToChars(const UChar *us, char *cs, UTextOffset length);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -158,9 +158,51 @@ U_CAPI UChar* U_EXPORT2 u_uastrncpy(UChar *ucs1,
|
|||
*/
|
||||
U_CAPI char* U_EXPORT2 u_austrcpy(char *s1,
|
||||
const UChar *us2 );
|
||||
|
||||
/**
|
||||
* Unicode String literals in C.
|
||||
* We need one macro to declare a variable for the string
|
||||
* and to statically preinitialize it if possible,
|
||||
* and a second macro to dynamically intialize such a string variable if necessary.
|
||||
*
|
||||
* The macros are defined for maximum performance.
|
||||
* They work only for strings that contain "invariant characters", i.e.,
|
||||
* only latin letters, digits, and some punctuation.
|
||||
* See utypes.h for details.
|
||||
*
|
||||
* A pair of macros for a single string must be used with the same
|
||||
* parameters.
|
||||
* The string parameter must be a C string literal.
|
||||
* The length of the string, not including the terminating
|
||||
* <code>NUL</code> must be specified as a constant.
|
||||
* The U_STRING_DECL macro should be invoked exactly once for one
|
||||
* such string variable before it is used.
|
||||
*
|
||||
* Usage:
|
||||
* <pre>
|
||||
*   U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
|
||||
*   U_STRING_DECL(ustringVar2, "jumps 5%", 8);
|
||||
*   static bool_t didInit=FALSE;
|
||||
*  
|
||||
*   int32_t function() {
|
||||
*   if(!didInit) {
|
||||
*   U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
|
||||
*   U_STRING_INIT(ustringVar2, "jumps 5%", 8);
|
||||
*   didInit=TRUE;
|
||||
*   }
|
||||
*   return u_strcmp(ustringVar1, ustringVar2);
|
||||
*   }
|
||||
* </pre>
|
||||
*/
|
||||
#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (UChar *)L ## cs }
|
||||
# define U_STRING_INIT(var, cs, length)
|
||||
#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (UChar *)cs }
|
||||
# define U_STRING_INIT(var, cs, length)
|
||||
#else
|
||||
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]
|
||||
# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "putil.h"
|
||||
#include "intltest.h"
|
||||
#include "strtest.h"
|
||||
#include "ustring.h"
|
||||
|
||||
void StringTest::TestEndian() {
|
||||
union {
|
||||
|
@ -49,6 +50,8 @@ void StringTest::TestCharsetFamily() {
|
|||
}
|
||||
}
|
||||
|
||||
U_STRING_DECL(ustringVar, "aZ0 -", 5);
|
||||
|
||||
void StringTest::runIndexedTest(int32_t index, bool_t exec, char *&name, char *par) {
|
||||
if(exec) {
|
||||
logln("TestSuite Character and String Test: ");
|
||||
|
@ -72,6 +75,23 @@ void StringTest::runIndexedTest(int32_t index, bool_t exec, char *&name, char *p
|
|||
TestCharsetFamily();
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
name="Test_U_STRING";
|
||||
if(exec) {
|
||||
U_STRING_INIT(ustringVar, "aZ0 -", 5);
|
||||
if( sizeof(ustringVar)/sizeof(*ustringVar)!=6 ||
|
||||
ustringVar[0]!=0x61 ||
|
||||
ustringVar[1]!=0x5a ||
|
||||
ustringVar[2]!=0x30 ||
|
||||
ustringVar[3]!=0x20 ||
|
||||
ustringVar[4]!=0x2d ||
|
||||
ustringVar[5]!=0
|
||||
) {
|
||||
errln("Test_U_STRING: U_STRING_DECL with U_STRING_INIT does not work right! "
|
||||
"See putil.h and utypes.h with platform.h.");
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
name="";
|
||||
break;
|
||||
|
|
Loading…
Add table
Reference in a new issue