mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-09 15:27:38 +00:00
ICU-13052 Update ICU4C branch to current trunk
X-SVN-Rev: 39899
This commit is contained in:
parent
970b7293de
commit
f458acca1b
11 changed files with 656 additions and 11 deletions
|
@ -1116,7 +1116,7 @@ uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UEr
|
|||
// Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
|
||||
tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
|
||||
if (tmpLen > 1) {
|
||||
int i = 0;
|
||||
int32_t i = 0;
|
||||
// Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
|
||||
bLookup = FALSE;
|
||||
for (i = 0; i < UPRV_LENGTHOF(locName); i++)
|
||||
|
@ -1253,7 +1253,7 @@ uprv_convertToLCIDPlatform(const char* localeID)
|
|||
{
|
||||
// Need it to be UTF-16, not 8-bit
|
||||
wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
|
||||
int i;
|
||||
int32_t i;
|
||||
for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
|
||||
{
|
||||
if (asciiBCP47Tag[i] == '\0')
|
||||
|
|
|
@ -1781,7 +1781,7 @@ The leftmost codepage (.xxx) wins.
|
|||
// First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
|
||||
char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH];
|
||||
|
||||
int i;
|
||||
int32_t i;
|
||||
for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
|
||||
{
|
||||
if (windowsLocale[i] == '_')
|
||||
|
|
|
@ -117,7 +117,7 @@
|
|||
// First we need to go from char to UTF-16
|
||||
// u_UCharsToChars could work but it requires length.
|
||||
WCHAR utf16Path[MAX_PATH];
|
||||
int i;
|
||||
int32_t i;
|
||||
for (i = 0; i < UPRV_LENGTHOF(utf16Path); i++)
|
||||
{
|
||||
utf16Path[i] = path[i];
|
||||
|
|
|
@ -498,7 +498,7 @@
|
|||
# define U_CPLUSPLUS_VERSION 1
|
||||
#endif
|
||||
|
||||
#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
|
||||
#if (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
|
||||
// add in std::nullptr_t
|
||||
namespace std {
|
||||
typedef decltype(nullptr) nullptr_t;
|
||||
|
|
|
@ -208,10 +208,27 @@ endif
|
|||
%.o: $(srcdir)/%.c
|
||||
$(COMPILE.c) $(DYNAMICCPPFLAGS) $(DYNAMICCFLAGS) -o $@ $<
|
||||
|
||||
# This causes escapesrc to be built before other ICU targets.
|
||||
NEED_ESCAPING=YES
|
||||
|
||||
ifneq ($(SKIP_ESCAPING),)
|
||||
%.$(STATIC_O): $(srcdir)/%.cpp
|
||||
$(COMPILE.cc) $(STATICCPPFLAGS) $(STATICCXXFLAGS) -o $@ $<
|
||||
%.o: $(srcdir)/%.cpp
|
||||
$(COMPILE.cc) $(DYNAMICCPPFLAGS) $(DYNAMICCXXFLAGS) -o $@ $<
|
||||
else
|
||||
# convert *.cpp files to _*.cpp with \u / \U escaping
|
||||
CLEANFILES += _*.cpp
|
||||
|
||||
# the actual escaping
|
||||
_%.cpp: $(srcdir)/%.cpp
|
||||
@$(BINDIR)/escapesrc$(EXEEXT) $< $@
|
||||
|
||||
%.$(STATIC_O): _%.cpp
|
||||
$(COMPILE.cc) $(STATICCPPFLAGS) $(STATICCXXFLAGS) -o $@ $<
|
||||
%.o: _%.cpp
|
||||
$(COMPILE.cc) $(DYNAMICCPPFLAGS) $(DYNAMICCXXFLAGS) -o $@ $<
|
||||
endif
|
||||
|
||||
## Dependency rules
|
||||
%.d : %.u
|
||||
|
|
|
@ -109,7 +109,7 @@ static UErrorCode GetEquivalentWindowsLocaleName(const Locale& locale, UnicodeSt
|
|||
// Need it to be UTF-16, not 8-bit
|
||||
// TODO: This seems like a good thing for a helper
|
||||
wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
|
||||
int i;
|
||||
int32_t i;
|
||||
for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
|
||||
{
|
||||
if (asciiBCP47Tag[i] == '\0')
|
||||
|
|
|
@ -154,7 +154,7 @@ static UErrorCode GetEquivalentWindowsLocaleName(const Locale& locale, UnicodeSt
|
|||
// Need it to be UTF-16, not 8-bit
|
||||
// TODO: This seems like a good thing for a helper
|
||||
wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
|
||||
int i;
|
||||
int32_t i;
|
||||
for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
|
||||
{
|
||||
if (asciiBCP47Tag[i] == '\0')
|
||||
|
|
|
@ -95,6 +95,12 @@ $(TARGET) : $(OBJECTS)
|
|||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
# depends on ICU being built
|
||||
gen-table: tblgen$(EXEEXT)
|
||||
$(INVOKE) ./tblgen$(EXEEXT) > $(srcdir)/cptbl.h
|
||||
|
||||
tblgen$(EXEEXT): tblgen.o
|
||||
$(LINK.cc) $(OUTOPT)$@ $^ $(LIBS) $(LIBICUUC)
|
||||
|
||||
ifeq (,$(MAKECMDGOALS))
|
||||
-include $(DEPS)
|
||||
|
|
521
icu4c/source/tools/escapesrc/cptbl.h
Normal file
521
icu4c/source/tools/escapesrc/cptbl.h
Normal file
|
@ -0,0 +1,521 @@
|
|||
// Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html
|
||||
// generated by tblgen. You weren't going to edit it by hand, were you?
|
||||
|
||||
static const char cp1047_8859_1[256] = {
|
||||
(char)0x00, /* 00 */
|
||||
(char)0x01, /* 01 */
|
||||
(char)0x02, /* 02 */
|
||||
(char)0x03, /* 03 */
|
||||
(char)0x9C, /* 04 */
|
||||
(char)0x09, /* 05 */
|
||||
(char)0x86, /* 06 */
|
||||
(char)0x7F, /* 07 */
|
||||
(char)0x97, /* 08 */
|
||||
(char)0x8D, /* 09 */
|
||||
(char)0x8E, /* 0A */
|
||||
(char)0x0B, /* 0B */
|
||||
(char)0x0C, /* 0C */
|
||||
(char)0x0D, /* 0D */
|
||||
(char)0x0E, /* 0E */
|
||||
(char)0x0F, /* 0F */
|
||||
(char)0x10, /* 10 */
|
||||
(char)0x11, /* 11 */
|
||||
(char)0x12, /* 12 */
|
||||
(char)0x13, /* 13 */
|
||||
(char)0x9D, /* 14 */
|
||||
(char)0x85, /* 15 */
|
||||
(char)0x08, /* 16 */
|
||||
(char)0x87, /* 17 */
|
||||
(char)0x18, /* 18 */
|
||||
(char)0x19, /* 19 */
|
||||
(char)0x92, /* 1A */
|
||||
(char)0x8F, /* 1B */
|
||||
(char)0x1C, /* 1C */
|
||||
(char)0x1D, /* 1D */
|
||||
(char)0x1E, /* 1E */
|
||||
(char)0x1F, /* 1F */
|
||||
(char)0x80, /* 20 */
|
||||
(char)0x81, /* 21 */
|
||||
(char)0x82, /* 22 */
|
||||
(char)0x83, /* 23 */
|
||||
(char)0x84, /* 24 */
|
||||
(char)0x0A, /* 25 */
|
||||
(char)0x17, /* 26 */
|
||||
(char)0x1B, /* 27 */
|
||||
(char)0x88, /* 28 */
|
||||
(char)0x89, /* 29 */
|
||||
(char)0x8A, /* 2A */
|
||||
(char)0x8B, /* 2B */
|
||||
(char)0x8C, /* 2C */
|
||||
(char)0x05, /* 2D */
|
||||
(char)0x06, /* 2E */
|
||||
(char)0x07, /* 2F */
|
||||
(char)0x90, /* 30 */
|
||||
(char)0x91, /* 31 */
|
||||
(char)0x16, /* 32 */
|
||||
(char)0x93, /* 33 */
|
||||
(char)0x94, /* 34 */
|
||||
(char)0x95, /* 35 */
|
||||
(char)0x96, /* 36 */
|
||||
(char)0x04, /* 37 */
|
||||
(char)0x98, /* 38 */
|
||||
(char)0x99, /* 39 */
|
||||
(char)0x9A, /* 3A */
|
||||
(char)0x9B, /* 3B */
|
||||
(char)0x14, /* 3C */
|
||||
(char)0x15, /* 3D */
|
||||
(char)0x9E, /* 3E */
|
||||
(char)0x1A, /* 3F */
|
||||
(char)0x20, /* 40 */
|
||||
(char)0xA0, /* 41 */
|
||||
(char)0xE2, /* 42 */
|
||||
(char)0xE4, /* 43 */
|
||||
(char)0xE0, /* 44 */
|
||||
(char)0xE1, /* 45 */
|
||||
(char)0xE3, /* 46 */
|
||||
(char)0xE5, /* 47 */
|
||||
(char)0xE7, /* 48 */
|
||||
(char)0xF1, /* 49 */
|
||||
(char)0xA2, /* 4A */
|
||||
(char)0x2E, /* 4B */
|
||||
(char)0x3C, /* 4C */
|
||||
(char)0x28, /* 4D */
|
||||
(char)0x2B, /* 4E */
|
||||
(char)0x7C, /* 4F */
|
||||
(char)0x26, /* 50 */
|
||||
(char)0xE9, /* 51 */
|
||||
(char)0xEA, /* 52 */
|
||||
(char)0xEB, /* 53 */
|
||||
(char)0xE8, /* 54 */
|
||||
(char)0xED, /* 55 */
|
||||
(char)0xEE, /* 56 */
|
||||
(char)0xEF, /* 57 */
|
||||
(char)0xEC, /* 58 */
|
||||
(char)0xDF, /* 59 */
|
||||
(char)0x21, /* 5A */
|
||||
(char)0x24, /* 5B */
|
||||
(char)0x2A, /* 5C */
|
||||
(char)0x29, /* 5D */
|
||||
(char)0x3B, /* 5E */
|
||||
(char)0x5E, /* 5F */
|
||||
(char)0x2D, /* 60 */
|
||||
(char)0x2F, /* 61 */
|
||||
(char)0xC2, /* 62 */
|
||||
(char)0xC4, /* 63 */
|
||||
(char)0xC0, /* 64 */
|
||||
(char)0xC1, /* 65 */
|
||||
(char)0xC3, /* 66 */
|
||||
(char)0xC5, /* 67 */
|
||||
(char)0xC7, /* 68 */
|
||||
(char)0xD1, /* 69 */
|
||||
(char)0xA6, /* 6A */
|
||||
(char)0x2C, /* 6B */
|
||||
(char)0x25, /* 6C */
|
||||
(char)0x5F, /* 6D */
|
||||
(char)0x3E, /* 6E */
|
||||
(char)0x3F, /* 6F */
|
||||
(char)0xF8, /* 70 */
|
||||
(char)0xC9, /* 71 */
|
||||
(char)0xCA, /* 72 */
|
||||
(char)0xCB, /* 73 */
|
||||
(char)0xC8, /* 74 */
|
||||
(char)0xCD, /* 75 */
|
||||
(char)0xCE, /* 76 */
|
||||
(char)0xCF, /* 77 */
|
||||
(char)0xCC, /* 78 */
|
||||
(char)0x60, /* 79 */
|
||||
(char)0x3A, /* 7A */
|
||||
(char)0x23, /* 7B */
|
||||
(char)0x40, /* 7C */
|
||||
(char)0x27, /* 7D */
|
||||
(char)0x3D, /* 7E */
|
||||
(char)0x22, /* 7F */
|
||||
(char)0xD8, /* 80 */
|
||||
(char)0x61, /* 81 */
|
||||
(char)0x62, /* 82 */
|
||||
(char)0x63, /* 83 */
|
||||
(char)0x64, /* 84 */
|
||||
(char)0x65, /* 85 */
|
||||
(char)0x66, /* 86 */
|
||||
(char)0x67, /* 87 */
|
||||
(char)0x68, /* 88 */
|
||||
(char)0x69, /* 89 */
|
||||
(char)0xAB, /* 8A */
|
||||
(char)0xBB, /* 8B */
|
||||
(char)0xF0, /* 8C */
|
||||
(char)0xFD, /* 8D */
|
||||
(char)0xFE, /* 8E */
|
||||
(char)0xB1, /* 8F */
|
||||
(char)0xB0, /* 90 */
|
||||
(char)0x6A, /* 91 */
|
||||
(char)0x6B, /* 92 */
|
||||
(char)0x6C, /* 93 */
|
||||
(char)0x6D, /* 94 */
|
||||
(char)0x6E, /* 95 */
|
||||
(char)0x6F, /* 96 */
|
||||
(char)0x70, /* 97 */
|
||||
(char)0x71, /* 98 */
|
||||
(char)0x72, /* 99 */
|
||||
(char)0xAA, /* 9A */
|
||||
(char)0xBA, /* 9B */
|
||||
(char)0xE6, /* 9C */
|
||||
(char)0xB8, /* 9D */
|
||||
(char)0xC6, /* 9E */
|
||||
(char)0xA4, /* 9F */
|
||||
(char)0xB5, /* A0 */
|
||||
(char)0x7E, /* A1 */
|
||||
(char)0x73, /* A2 */
|
||||
(char)0x74, /* A3 */
|
||||
(char)0x75, /* A4 */
|
||||
(char)0x76, /* A5 */
|
||||
(char)0x77, /* A6 */
|
||||
(char)0x78, /* A7 */
|
||||
(char)0x79, /* A8 */
|
||||
(char)0x7A, /* A9 */
|
||||
(char)0xA1, /* AA */
|
||||
(char)0xBF, /* AB */
|
||||
(char)0xD0, /* AC */
|
||||
(char)0x5B, /* AD */
|
||||
(char)0xDE, /* AE */
|
||||
(char)0xAE, /* AF */
|
||||
(char)0xAC, /* B0 */
|
||||
(char)0xA3, /* B1 */
|
||||
(char)0xA5, /* B2 */
|
||||
(char)0xB7, /* B3 */
|
||||
(char)0xA9, /* B4 */
|
||||
(char)0xA7, /* B5 */
|
||||
(char)0xB6, /* B6 */
|
||||
(char)0xBC, /* B7 */
|
||||
(char)0xBD, /* B8 */
|
||||
(char)0xBE, /* B9 */
|
||||
(char)0xDD, /* BA */
|
||||
(char)0xA8, /* BB */
|
||||
(char)0xAF, /* BC */
|
||||
(char)0x5D, /* BD */
|
||||
(char)0xB4, /* BE */
|
||||
(char)0xD7, /* BF */
|
||||
(char)0x7B, /* C0 */
|
||||
(char)0x41, /* C1 */
|
||||
(char)0x42, /* C2 */
|
||||
(char)0x43, /* C3 */
|
||||
(char)0x44, /* C4 */
|
||||
(char)0x45, /* C5 */
|
||||
(char)0x46, /* C6 */
|
||||
(char)0x47, /* C7 */
|
||||
(char)0x48, /* C8 */
|
||||
(char)0x49, /* C9 */
|
||||
(char)0xAD, /* CA */
|
||||
(char)0xF4, /* CB */
|
||||
(char)0xF6, /* CC */
|
||||
(char)0xF2, /* CD */
|
||||
(char)0xF3, /* CE */
|
||||
(char)0xF5, /* CF */
|
||||
(char)0x7D, /* D0 */
|
||||
(char)0x4A, /* D1 */
|
||||
(char)0x4B, /* D2 */
|
||||
(char)0x4C, /* D3 */
|
||||
(char)0x4D, /* D4 */
|
||||
(char)0x4E, /* D5 */
|
||||
(char)0x4F, /* D6 */
|
||||
(char)0x50, /* D7 */
|
||||
(char)0x51, /* D8 */
|
||||
(char)0x52, /* D9 */
|
||||
(char)0xB9, /* DA */
|
||||
(char)0xFB, /* DB */
|
||||
(char)0xFC, /* DC */
|
||||
(char)0xF9, /* DD */
|
||||
(char)0xFA, /* DE */
|
||||
(char)0xFF, /* DF */
|
||||
(char)0x5C, /* E0 */
|
||||
(char)0xF7, /* E1 */
|
||||
(char)0x53, /* E2 */
|
||||
(char)0x54, /* E3 */
|
||||
(char)0x55, /* E4 */
|
||||
(char)0x56, /* E5 */
|
||||
(char)0x57, /* E6 */
|
||||
(char)0x58, /* E7 */
|
||||
(char)0x59, /* E8 */
|
||||
(char)0x5A, /* E9 */
|
||||
(char)0xB2, /* EA */
|
||||
(char)0xD4, /* EB */
|
||||
(char)0xD6, /* EC */
|
||||
(char)0xD2, /* ED */
|
||||
(char)0xD3, /* EE */
|
||||
(char)0xD5, /* EF */
|
||||
(char)0x30, /* F0 */
|
||||
(char)0x31, /* F1 */
|
||||
(char)0x32, /* F2 */
|
||||
(char)0x33, /* F3 */
|
||||
(char)0x34, /* F4 */
|
||||
(char)0x35, /* F5 */
|
||||
(char)0x36, /* F6 */
|
||||
(char)0x37, /* F7 */
|
||||
(char)0x38, /* F8 */
|
||||
(char)0x39, /* F9 */
|
||||
(char)0xB3, /* FA */
|
||||
(char)0xDB, /* FB */
|
||||
(char)0xDC, /* FC */
|
||||
(char)0xD9, /* FD */
|
||||
(char)0xDA, /* FE */
|
||||
(char)0x9F, /* FF */
|
||||
};
|
||||
|
||||
static const bool oldIllegal[256] = {
|
||||
false, /* U+0000 */
|
||||
false, /* U+0001 */
|
||||
false, /* U+0002 */
|
||||
false, /* U+0003 */
|
||||
false, /* U+0004 */
|
||||
false, /* U+0005 */
|
||||
false, /* U+0006 */
|
||||
false, /* U+0007 */
|
||||
false, /* U+0008 */
|
||||
false, /* U+0009 */
|
||||
false, /* U+000A */
|
||||
false, /* U+000B */
|
||||
false, /* U+000C */
|
||||
false, /* U+000D */
|
||||
false, /* U+000E */
|
||||
false, /* U+000F */
|
||||
false, /* U+0010 */
|
||||
false, /* U+0011 */
|
||||
false, /* U+0012 */
|
||||
false, /* U+0013 */
|
||||
false, /* U+0014 */
|
||||
false, /* U+0015 */
|
||||
false, /* U+0016 */
|
||||
false, /* U+0017 */
|
||||
false, /* U+0018 */
|
||||
false, /* U+0019 */
|
||||
false, /* U+001A */
|
||||
false, /* U+001B */
|
||||
false, /* U+001C */
|
||||
false, /* U+001D */
|
||||
false, /* U+001E */
|
||||
false, /* U+001F */
|
||||
true, /* U+0020 */
|
||||
true, /* U+0021 */
|
||||
true, /* U+0022 */
|
||||
true, /* U+0023 */
|
||||
false, /* U+0024 */
|
||||
true, /* U+0025 */
|
||||
true, /* U+0026 */
|
||||
true, /* U+0027 */
|
||||
true, /* U+0028 */
|
||||
true, /* U+0029 */
|
||||
true, /* U+002A */
|
||||
true, /* U+002B */
|
||||
true, /* U+002C */
|
||||
true, /* U+002D */
|
||||
true, /* U+002E */
|
||||
true, /* U+002F */
|
||||
true, /* U+0030 */
|
||||
true, /* U+0031 */
|
||||
true, /* U+0032 */
|
||||
true, /* U+0033 */
|
||||
true, /* U+0034 */
|
||||
true, /* U+0035 */
|
||||
true, /* U+0036 */
|
||||
true, /* U+0037 */
|
||||
true, /* U+0038 */
|
||||
true, /* U+0039 */
|
||||
true, /* U+003A */
|
||||
true, /* U+003B */
|
||||
true, /* U+003C */
|
||||
true, /* U+003D */
|
||||
true, /* U+003E */
|
||||
true, /* U+003F */
|
||||
false, /* U+0040 */
|
||||
true, /* U+0041 */
|
||||
true, /* U+0042 */
|
||||
true, /* U+0043 */
|
||||
true, /* U+0044 */
|
||||
true, /* U+0045 */
|
||||
true, /* U+0046 */
|
||||
true, /* U+0047 */
|
||||
true, /* U+0048 */
|
||||
true, /* U+0049 */
|
||||
true, /* U+004A */
|
||||
true, /* U+004B */
|
||||
true, /* U+004C */
|
||||
true, /* U+004D */
|
||||
true, /* U+004E */
|
||||
true, /* U+004F */
|
||||
true, /* U+0050 */
|
||||
true, /* U+0051 */
|
||||
true, /* U+0052 */
|
||||
true, /* U+0053 */
|
||||
true, /* U+0054 */
|
||||
true, /* U+0055 */
|
||||
true, /* U+0056 */
|
||||
true, /* U+0057 */
|
||||
true, /* U+0058 */
|
||||
true, /* U+0059 */
|
||||
true, /* U+005A */
|
||||
true, /* U+005B */
|
||||
false, /* U+005C */
|
||||
true, /* U+005D */
|
||||
true, /* U+005E */
|
||||
true, /* U+005F */
|
||||
false, /* U+0060 */
|
||||
true, /* U+0061 */
|
||||
true, /* U+0062 */
|
||||
true, /* U+0063 */
|
||||
true, /* U+0064 */
|
||||
true, /* U+0065 */
|
||||
true, /* U+0066 */
|
||||
true, /* U+0067 */
|
||||
true, /* U+0068 */
|
||||
true, /* U+0069 */
|
||||
true, /* U+006A */
|
||||
true, /* U+006B */
|
||||
true, /* U+006C */
|
||||
true, /* U+006D */
|
||||
true, /* U+006E */
|
||||
true, /* U+006F */
|
||||
true, /* U+0070 */
|
||||
true, /* U+0071 */
|
||||
true, /* U+0072 */
|
||||
true, /* U+0073 */
|
||||
true, /* U+0074 */
|
||||
true, /* U+0075 */
|
||||
true, /* U+0076 */
|
||||
true, /* U+0077 */
|
||||
true, /* U+0078 */
|
||||
true, /* U+0079 */
|
||||
true, /* U+007A */
|
||||
true, /* U+007B */
|
||||
true, /* U+007C */
|
||||
true, /* U+007D */
|
||||
true, /* U+007E */
|
||||
false, /* U+007F */
|
||||
false, /* U+0080 */
|
||||
false, /* U+0081 */
|
||||
false, /* U+0082 */
|
||||
false, /* U+0083 */
|
||||
false, /* U+0084 */
|
||||
false, /* U+0085 */
|
||||
false, /* U+0086 */
|
||||
false, /* U+0087 */
|
||||
false, /* U+0088 */
|
||||
false, /* U+0089 */
|
||||
false, /* U+008A */
|
||||
false, /* U+008B */
|
||||
false, /* U+008C */
|
||||
false, /* U+008D */
|
||||
false, /* U+008E */
|
||||
false, /* U+008F */
|
||||
false, /* U+0090 */
|
||||
false, /* U+0091 */
|
||||
false, /* U+0092 */
|
||||
false, /* U+0093 */
|
||||
false, /* U+0094 */
|
||||
false, /* U+0095 */
|
||||
false, /* U+0096 */
|
||||
false, /* U+0097 */
|
||||
false, /* U+0098 */
|
||||
false, /* U+0099 */
|
||||
false, /* U+009A */
|
||||
false, /* U+009B */
|
||||
false, /* U+009C */
|
||||
false, /* U+009D */
|
||||
false, /* U+009E */
|
||||
false, /* U+009F */
|
||||
false, /* U+00A0 */
|
||||
false, /* U+00A1 */
|
||||
false, /* U+00A2 */
|
||||
false, /* U+00A3 */
|
||||
false, /* U+00A4 */
|
||||
false, /* U+00A5 */
|
||||
false, /* U+00A6 */
|
||||
false, /* U+00A7 */
|
||||
false, /* U+00A8 */
|
||||
false, /* U+00A9 */
|
||||
false, /* U+00AA */
|
||||
false, /* U+00AB */
|
||||
false, /* U+00AC */
|
||||
false, /* U+00AD */
|
||||
false, /* U+00AE */
|
||||
false, /* U+00AF */
|
||||
false, /* U+00B0 */
|
||||
false, /* U+00B1 */
|
||||
false, /* U+00B2 */
|
||||
false, /* U+00B3 */
|
||||
false, /* U+00B4 */
|
||||
false, /* U+00B5 */
|
||||
false, /* U+00B6 */
|
||||
false, /* U+00B7 */
|
||||
false, /* U+00B8 */
|
||||
false, /* U+00B9 */
|
||||
false, /* U+00BA */
|
||||
false, /* U+00BB */
|
||||
false, /* U+00BC */
|
||||
false, /* U+00BD */
|
||||
false, /* U+00BE */
|
||||
false, /* U+00BF */
|
||||
false, /* U+00C0 */
|
||||
false, /* U+00C1 */
|
||||
false, /* U+00C2 */
|
||||
false, /* U+00C3 */
|
||||
false, /* U+00C4 */
|
||||
false, /* U+00C5 */
|
||||
false, /* U+00C6 */
|
||||
false, /* U+00C7 */
|
||||
false, /* U+00C8 */
|
||||
false, /* U+00C9 */
|
||||
false, /* U+00CA */
|
||||
false, /* U+00CB */
|
||||
false, /* U+00CC */
|
||||
false, /* U+00CD */
|
||||
false, /* U+00CE */
|
||||
false, /* U+00CF */
|
||||
false, /* U+00D0 */
|
||||
false, /* U+00D1 */
|
||||
false, /* U+00D2 */
|
||||
false, /* U+00D3 */
|
||||
false, /* U+00D4 */
|
||||
false, /* U+00D5 */
|
||||
false, /* U+00D6 */
|
||||
false, /* U+00D7 */
|
||||
false, /* U+00D8 */
|
||||
false, /* U+00D9 */
|
||||
false, /* U+00DA */
|
||||
false, /* U+00DB */
|
||||
false, /* U+00DC */
|
||||
false, /* U+00DD */
|
||||
false, /* U+00DE */
|
||||
false, /* U+00DF */
|
||||
false, /* U+00E0 */
|
||||
false, /* U+00E1 */
|
||||
false, /* U+00E2 */
|
||||
false, /* U+00E3 */
|
||||
false, /* U+00E4 */
|
||||
false, /* U+00E5 */
|
||||
false, /* U+00E6 */
|
||||
false, /* U+00E7 */
|
||||
false, /* U+00E8 */
|
||||
false, /* U+00E9 */
|
||||
false, /* U+00EA */
|
||||
false, /* U+00EB */
|
||||
false, /* U+00EC */
|
||||
false, /* U+00ED */
|
||||
false, /* U+00EE */
|
||||
false, /* U+00EF */
|
||||
false, /* U+00F0 */
|
||||
false, /* U+00F1 */
|
||||
false, /* U+00F2 */
|
||||
false, /* U+00F3 */
|
||||
false, /* U+00F4 */
|
||||
false, /* U+00F5 */
|
||||
false, /* U+00F6 */
|
||||
false, /* U+00F7 */
|
||||
false, /* U+00F8 */
|
||||
false, /* U+00F9 */
|
||||
false, /* U+00FA */
|
||||
false, /* U+00FB */
|
||||
false, /* U+00FC */
|
||||
false, /* U+00FD */
|
||||
false, /* U+00FE */
|
||||
false, /* U+00FF */
|
||||
};
|
||||
|
|
@ -27,6 +27,10 @@ static const char
|
|||
kQUOT = 0x27,
|
||||
kDBLQ = 0x22;
|
||||
|
||||
# include "cptbl.h"
|
||||
|
||||
# define cp1047_to_8859(c) cp1047_8859_1[c]
|
||||
|
||||
std::string prog;
|
||||
|
||||
void usage() {
|
||||
|
@ -150,7 +154,7 @@ bool appendUtf8(std::string &outstr,
|
|||
bool fixu8(std::string &linestr, size_t origpos, size_t &endpos) {
|
||||
size_t pos = origpos + 3;
|
||||
std::string outstr;
|
||||
outstr += (kDBLQ);
|
||||
outstr += '\"'; // local encoding
|
||||
for(;pos<endpos;pos++) {
|
||||
char c = linestr[pos];
|
||||
if(c == kBKSLASH) {
|
||||
|
@ -171,7 +175,7 @@ bool fixu8(std::string &linestr, size_t origpos, size_t &endpos) {
|
|||
appendByte(outstr, c);
|
||||
}
|
||||
}
|
||||
outstr += (kDBLQ);
|
||||
outstr += ('\"');
|
||||
|
||||
linestr.replace(origpos, (endpos-origpos+1), outstr);
|
||||
|
||||
|
@ -231,19 +235,36 @@ bool fixAt(std::string &linestr, size_t pos) {
|
|||
if(linestr[pos] == '\\') continue;
|
||||
// some other escape… ignore
|
||||
} else {
|
||||
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
|
||||
// mogrify 1-4 bytes from 1047 'back' to utf-8
|
||||
char old_byte = linestr[pos];
|
||||
linestr[pos] = cp1047_to_8859(linestr[pos]);
|
||||
// how many more?
|
||||
int32_t trail = U8_COUNT_TRAIL_BYTES(linestr[pos]);
|
||||
for(size_t pos2 = pos+1; trail>0; pos++,trail--) {
|
||||
linestr[pos2] = cp1047_to_8859(linestr[pos2]);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Proceed to decode utf-8
|
||||
const uint8_t *s = (const uint8_t*) (linestr.c_str());
|
||||
int32_t i = pos;
|
||||
int32_t length = linestr.size();
|
||||
UChar32 c;
|
||||
if(U8_IS_SINGLE((uint8_t)s[i]) && oldIllegal[s[i]]) {
|
||||
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
|
||||
linestr[pos] = old_byte; // put it back
|
||||
#endif
|
||||
continue; // single code point not previously legal for \u escaping
|
||||
}
|
||||
|
||||
if(U8_IS_SINGLE((uint8_t)s[i])) continue; // single code point
|
||||
|
||||
// otherwise, convert it to \u / \U
|
||||
{
|
||||
U8_NEXT(s, i, length, c);
|
||||
}
|
||||
if(c<0) {
|
||||
fprintf(stderr, "Illegal utf-8 sequence\n");
|
||||
fprintf(stderr, "Line: >>%s<<\n", linestr.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
80
icu4c/source/tools/escapesrc/tblgen.cpp
Normal file
80
icu4c/source/tools/escapesrc/tblgen.cpp
Normal file
|
@ -0,0 +1,80 @@
|
|||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include <stdio.h>
|
||||
|
||||
static const char *kConverter = "ibm-1047";
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
printf("// %s\n", U_COPYRIGHT_STRING);
|
||||
printf("// generated by tblgen. You weren't going to edit it by hand, were you?\n");
|
||||
printf("\n");
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
LocalUConverterPointer cnv(ucnv_open(kConverter, &status));
|
||||
|
||||
if(U_FAILURE(status)) {
|
||||
fprintf(stderr, "Failed to open %s: %s\n", kConverter, u_errorName(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("static const char cp1047_8859_1[256] = { \n");
|
||||
for(int i=0x00; i<0x100; i++) {
|
||||
char cp1047[1];
|
||||
cp1047[0] = i;
|
||||
UChar u[1];
|
||||
UChar *target = u;
|
||||
const char *source = cp1047;
|
||||
ucnv_toUnicode(cnv.getAlias(), &target, u+1, &source, cp1047+1, nullptr, true, &status);
|
||||
if(U_FAILURE(status)) {
|
||||
fprintf(stderr, "Conversion failure at #%X: %s\n", i, u_errorName(status));
|
||||
return 2;
|
||||
}
|
||||
printf(" (char)0x%02X, /* %02X */\n", u[0], i);
|
||||
}
|
||||
printf("};\n\n");
|
||||
|
||||
//
|
||||
// UnicodeSet oldIllegal("[:print:]", status); // [a-zA-Z0-9_}{#)(><%:;.?*+-/^&|~!=,\\u005b\\u005d\\u005c]", status);
|
||||
UnicodeSet oldIllegal("[0-9 a-z A-Z "
|
||||
"_ \\{ \\} \\[ \\] # \\( \\) < > % \\: ; . "
|
||||
"? * + \\- / \\^ \\& | ~ ! = , \\ \" ' ]", status);
|
||||
|
||||
/*
|
||||
|
||||
http://www.lirmm.fr/~ducour/Doc-objets/ISO+IEC+14882-1998.pdf ( note: 1998 ) page 10, section 2.2 says:
|
||||
|
||||
1 The basic source character set consists of 96 characters: the space character, the control characters repre- 15)
|
||||
senting horizontal tab, vertical tab, form feed, and new-line, plus the following 91 graphical characters:
|
||||
a b c d e f g h i j k l m n opqrstuvwxyz
|
||||
A B C D E F G H I J K L M N OPQRSTUVWXYZ
|
||||
0 12 3 4 5 6 7 8 9
|
||||
_ { } [ ] # ( ) < > % : ; . ?*+-/^&|~!=,\"
|
||||
2 The universal-character-name construct provides a way to name other characters. hex-quad:
|
||||
hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit
|
||||
universal-character-name: \u hex-quad
|
||||
\U hex-quad hex-quad
|
||||
The character designated by the universal-character-name \UNNNNNNNN is that character whose character short name in ISO/IEC 10646 is NNNNNNNN; the character designated by the universal-character-name \uNNNN is that character whose character short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value for a universal character name is less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the uni- versal character name designates a character in the basic source character set, then the program is ill- formed.
|
||||
|
||||
|
||||
So basically: printable ASCII plus 0x00-0x1F, 0x7F-0x9F, was all illegal.
|
||||
|
||||
Some discussion at http://unicode.org/mail-arch/unicode-ml/y2003-m10/0471.html
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
printf("static const bool oldIllegal[256] = { \n");
|
||||
for(UChar i=0x00; i<0x100;i++) {
|
||||
printf(" %s, /* U+%04X */\n",
|
||||
(oldIllegal.contains(i))?" true":"false",
|
||||
i);
|
||||
}
|
||||
printf("};\n\n");
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Reference in a new issue