ICU-68 implement unicode string literals in c

X-SVN-Rev: 263
This commit is contained in:
Markus Scherer 1999-11-30 23:25:49 +00:00
parent 3dce1c03f5
commit 1569b465a5
4 changed files with 180 additions and 5 deletions

View file

@ -1378,3 +1378,79 @@ const char* icu_getDefaultCodepage()
return DEFAULT_CONVERTER_NAME;
}
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
/*
* These maps for ASCII to/from EBCDIC are from
* "UTF-EBCDIC - EBCDIC-Friendly Unicode (or UCS) Transformation Format"
* at http://www.unicode.org/unicode/reports/tr16/
* but modified to explicitly exclude the variant graphical characters
* that are in ASCII at 0xa0 and above.
*/
static uint8_t asciiFromEbcdic[256]={
0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
0x2D, 0x2F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00,
0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x5C, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9F
};
static uint8_t ebcdicFromAscii[256]={
0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
#endif
U_CAPI void U_EXPORT2
u_charsToUChars(const char *cs, UChar *us, UTextOffset length) {
while(length>0) {
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
*us++=(UChar)(uint8_t)(*cs++);
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
*us++=(UChar)asciiFromEbcdic[(uint8_t)(*cs++)];
#else
# error U_CHARSET_FAMILY is not valid
#endif
--length;
}
}
U_CAPI void U_EXPORT2
u_UCharsToChars(const UChar *us, char *cs, UTextOffset length) {
while(length>0) {
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
*cs++=(char)(*us++);
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
*cs++=(char)ebcdicFromAscii[*us++];
#else
# error U_CHARSET_FAMILY is not valid
#endif
--length;
}
}

View file

@ -158,7 +158,7 @@ U_CAPI const char* U_EXPORT2 icu_getDefaultLocaleID(void);
*/
U_CAPI double U_EXPORT2 icu_nextDouble(double d, bool_t positive);
/*
/**
* Filesystem file and path separator characters.
* Example: '/' and ':' on Unix, '\\' and ';' on Windows.
*/
@ -179,4 +179,41 @@ U_CAPI double U_EXPORT2 icu_nextDouble(double d, bool_t positive);
# define U_PATH_SEP_STRING ":"
#endif
/**
* Convert char characters to UChar characters.
* This utility function is useful only for "invariant characters"
* that are encoded in the platform default encoding.
* They are a small, constant subset of the encoding and include
* just the latin letters, digits, and some punctuation.
* For details, see utypes.h .
*
* @param cs Input string, points to <code>length</code>
* character bytes from a subset of the platform encoding.
* @param us Output string, points to memory for <code>length</code>
* Unicode characters.
* @param length The number of characters to convert; this may
* include the terminating <code>NUL</code>.
*/
U_CAPI void U_EXPORT2
u_charsToUChars(const char *cs, UChar *us, UTextOffset length);
/**
* Convert UChar characters to char characters.
* This utility function is useful only for "invariant characters"
* that can be encoded in the platform default encoding.
* They are a small, constant subset of the encoding and include
* just the latin letters, digits, and some punctuation.
* For details, see utypes.h .
*
* @param us Input string, points to <code>length</code>
* Unicode characters that can be encoded with the
* codepage-invariant subset of the platform encoding.
* @param cs Output string, points to memory for <code>length</code>
* character bytes.
* @param length The number of characters to convert; this may
* include the terminating <code>NUL</code>.
*/
U_CAPI void U_EXPORT2
u_UCharsToChars(const UChar *us, char *cs, UTextOffset length);
#endif

View file

@ -158,9 +158,51 @@ U_CAPI UChar* U_EXPORT2 u_uastrncpy(UChar *ucs1,
*/
U_CAPI char* U_EXPORT2 u_austrcpy(char *s1,
const UChar *us2 );
/**
* Unicode String literals in C.
* We need one macro to declare a variable for the string
* and to statically preinitialize it if possible,
* and a second macro to dynamically intialize such a string variable if necessary.
*
* The macros are defined for maximum performance.
* They work only for strings that contain "invariant characters", i.e.,
* only latin letters, digits, and some punctuation.
* See utypes.h for details.
*
* A pair of macros for a single string must be used with the same
* parameters.
* The string parameter must be a C string literal.
* The length of the string, not including the terminating
* <code>NUL</code> must be specified as a constant.
* The U_STRING_DECL macro should be invoked exactly once for one
* such string variable before it is used.
*
* Usage:
* <pre>
* &#32; U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
* &#32; U_STRING_DECL(ustringVar2, "jumps 5%", 8);
* &#32; static bool_t didInit=FALSE;
* &#32;
* &#32; int32_t function() {
* &#32; if(!didInit) {
* &#32; U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
* &#32; U_STRING_INIT(ustringVar2, "jumps 5%", 8);
* &#32; didInit=TRUE;
* &#32; }
* &#32; return u_strcmp(ustringVar1, ustringVar2);
* &#32; }
* </pre>
*/
#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (UChar *)L ## cs }
# define U_STRING_INIT(var, cs, length)
#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (UChar *)cs }
# define U_STRING_INIT(var, cs, length)
#else
# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]
# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
#endif
#endif

View file

@ -22,6 +22,7 @@
#include "putil.h"
#include "intltest.h"
#include "strtest.h"
#include "ustring.h"
void StringTest::TestEndian() {
union {
@ -49,6 +50,8 @@ void StringTest::TestCharsetFamily() {
}
}
U_STRING_DECL(ustringVar, "aZ0 -", 5);
void StringTest::runIndexedTest(int32_t index, bool_t exec, char *&name, char *par) {
if(exec) {
logln("TestSuite Character and String Test: ");
@ -72,6 +75,23 @@ void StringTest::runIndexedTest(int32_t index, bool_t exec, char *&name, char *p
TestCharsetFamily();
}
break;
case 3:
name="Test_U_STRING";
if(exec) {
U_STRING_INIT(ustringVar, "aZ0 -", 5);
if( sizeof(ustringVar)/sizeof(*ustringVar)!=6 ||
ustringVar[0]!=0x61 ||
ustringVar[1]!=0x5a ||
ustringVar[2]!=0x30 ||
ustringVar[3]!=0x20 ||
ustringVar[4]!=0x2d ||
ustringVar[5]!=0
) {
errln("Test_U_STRING: U_STRING_DECL with U_STRING_INIT does not work right! "
"See putil.h and utypes.h with platform.h.");
}
}
break;
default:
name="";
break;