ICU-4639 Add Big5, use sorted mbcs statistics, increase coverage.

X-SVN-Rev: 19152
This commit is contained in:
Eric Mader 2006-02-13 20:47:36 +00:00
parent 982f76f213
commit ba21b7880e
5 changed files with 162 additions and 68 deletions

View file

@ -97,6 +97,7 @@ void CharsetDetector::setRecognizers()
new CharsetRecog_gb_18030(),
new CharsetRecog_euc_jp(),
new CharsetRecog_euc_kr(),
new CharsetRecog_big5(),
new CharsetRecog_2022JP(),
new CharsetRecog_2022KR(),

View file

@ -21,70 +21,75 @@ const int32_t commonChars_sjis [] = {
// TODO: This set of data comes from the character frequency-
// of-occurence analysis tool. The data needs to be moved
// into a resource and loaded from there.
0x82cc, 0x82c5, 0x82a2, 0x815b, 0x8141, 0x82b5, 0x82c9, 0x82b7, 0x8142, 0x82c4,
0x82c6, 0x82cd, 0x82dc, 0x82f0, 0x82e9, 0x82c8, 0x82aa, 0x8393, 0x82bd, 0x8358,
0x82ea, 0x8343, 0x82a4, 0x82a9, 0x8367, 0x82b1, 0x8145, 0x82e0, 0x838b, 0x834e,
0x82e8, 0x82e7, 0x8140, 0x8362, 0x8389, 0x82c1, 0x838a, 0x82ab, 0x8376, 0x82b3,
0x82f1, 0x82a0, 0x8368, 0x93fa, 0x8175, 0x8176, 0x835e, 0x82e6, 0x8357, 0x82ad,
0x8381, 0x82a6, 0x82b9, 0x82bb, 0x82be, 0x8341, 0x8374, 0x82af, 0x9056, 0x82a8,
0x82c2, 0x8354, 0x8e67, 0x8375, 0x82c7, 0x95f1, 0x8356, 0x967b, 0x92e8, 0x8345,
0x82ce, 0x8385, 0x9770, 0x82df, 0x82dd, 0x836f, 0x8342, 0x8ca7, 0x8352, 0x837d,
0x838d, 0x8346, 0x834f, 0x8380, 0x82ed, 0x8d73, 0x8349, 0x8365, 0x8fee, 0x95b6,
0x8169, 0x816a, 0x836a, 0x8dec, 0x82bf, 0x834c, 0x8366, 0x82e2, 0x838c, 0x945c};
0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0,
0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5,
0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc,
0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341,
0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389,
0x838a, 0x838b, 0x838d, 0x8393, 0x8e96, 0x93fa, 0x95aa};
const int32_t commonChars_euc_jp[] =
const int32_t commonChars_euc_jp[] = {
// TODO: This set of data comes from the character frequency-
// of-occurence analysis tool. The data needs to be moved
// into a resource and loaded from there.
{0xa4ce, 0xa4c7, 0xa4a4, 0xa1bc, 0xa1a2, 0xa4b7, 0xa4cb, 0xa4b9, 0xa1a3, 0xa4c6,
0xa4c8, 0xa4cf, 0xa4de, 0xa4f2, 0xa4eb, 0xa4ca, 0xa4ac, 0xa5f3, 0xa4bf, 0xa5b9,
0xa4ec, 0xa5a4, 0xa4a6, 0xa4ab, 0xa5c8, 0xa4b3, 0xa1a6, 0xa4e2, 0xa5eb, 0xa5af,
0xa4ea, 0xa4e9, 0xa1a1, 0xa5c3, 0xa5e9, 0xa4c3, 0xa5ea, 0xa4ad, 0xa5d7, 0xa4b5,
0xa4f3, 0xa4a2, 0xa5c9, 0xc6fc, 0xa1d6, 0xa1d7, 0xa5bf, 0xa4e8, 0xa5b8, 0xa4af,
0xa5e1, 0xa4a8, 0xa4bb, 0xa4bd, 0xa4c0, 0xa5a2, 0xa5d5, 0xa4b1, 0xbfb7, 0xa4aa,
0xa4c4, 0xa5b5, 0xbbc8, 0xa5d6, 0xa4c9, 0xcaf3, 0xa5b7, 0xcbdc, 0xc4ea, 0xa5a6,
0xa4d0, 0xa5e5, 0xcdd1, 0xa4e1, 0xa4df, 0xa5d0, 0xa5a3, 0xb8a9, 0xa5b3, 0xa5de,
0xa5ed, 0xa5a7, 0xa5b0, 0xa5e0, 0xa4ef, 0xb9d4, 0xa5aa, 0xa5c6, 0xbef0, 0xcab8,
0xa1ca, 0xa1cb, 0xa5cb, 0xbaee, 0xa4c1, 0xa5ad, 0xa5c7, 0xa4e4, 0xa5ec, 0xc7bd};
0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2,
0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3,
0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4,
0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de,
0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef,
0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af,
0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7,
0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1,
0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee,
0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1};
const int32_t commonChars_euc_kr[] =
const int32_t commonChars_euc_kr[] = {
// TODO: This set of data comes from the character frequency-
// of-occurence analysis tool. The data needs to be moved
// into a resource and loaded from there.
{0xc0cc, 0xb4d9, 0xb4c2, 0xc0c7, 0xbfa1, 0xc7cf, 0xb0a1, 0xb0ed, 0xc7d1, 0xc1f6,
0xc0bb, 0xb7ce, 0xb1e2, 0xbcad, 0xc0ba, 0xbbe7, 0xc1a4, 0xc0da, 0xb5b5, 0xb8a6,
0xbeee, 0xb4cf, 0xbcf6, 0xbdc3, 0xb1d7, 0xb4eb, 0xb8ae, 0xc0ce, 0xb3aa, 0xbec6,
0xc0d6, 0xbab8, 0xb5e9, 0xb6f3, 0xc7d8, 0xb0cd, 0xc0cf, 0xbdba, 0xc0b8, 0xb1b9,
0xc1a6, 0xb9fd, 0xbbf3, 0xb0d4, 0xb8e9, 0xb8b8, 0xb0fa, 0xc0fb, 0xbace, 0xc1d6,
0xbfa9, 0xc0fc, 0xbfeb, 0xb9ae, 0xc6ae, 0xbbfd, 0xbcba, 0xc0a7, 0xbff8, 0xb5c7,
0xbfe4, 0xbfec, 0xbdc5, 0xc7d2, 0xc7e5, 0xb0fc, 0xb1b8, 0xbaf1, 0xbedf, 0xc5cd,
0xb8b6, 0xbdc0, 0xb7af, 0xb5bf, 0xb3bb, 0xc8ad, 0xc0bd, 0xb0b3, 0xc4a1, 0xb7c2,
0xb9ab, 0xc0af, 0xbef8, 0xb5a5, 0xbcd2, 0xb9ce, 0xc1df, 0xbfc0, 0xc1f8, 0xb0e6,
0xb1c7, 0xbad0, 0xbefa, 0xc0e5, 0xbec8, 0xc1b6, 0xb8bb, 0xb0f8, 0xb9cc, 0xb0c5};
0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc,
0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9,
0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce,
0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce,
0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba,
0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee,
0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7,
0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6,
0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6,
0xc1f8, 0xc4a1, 0xc5cd, 0xc6ae, 0xc7cf, 0xc7d1, 0xc7d2, 0xc7d8, 0xc7e5, 0xc8ad};
const int32_t commonChars_gb_18030[] =
const int32_t commonChars_big5[] = {
// TODO: This set of data comes from the character frequency-
// of-occurence analysis tool. The data needs to be moved
// into a resource and loaded from there.
{0xa3ac, 0xb5c4, 0xa1a1, 0xa1a4, 0xa1a3, 0xcac7, 0xd2bb, 0xb4f3, 0xd4da, 0xd6d0,
0xcafd, 0xd3d0, 0xa1f3, 0xb2bb, 0xa3ba, 0xbbfa, 0xc8cb, 0xa1a2, 0xd3c3, 0xd1a7,
0xc8d5, 0xbedd, 0xb8f6, 0xd0c2, 0xcdf8, 0xd2aa, 0xb9fa, 0xc1cb, 0xc9cf, 0xa1b0,
0xa1b1, 0xced2, 0xbcfe, 0xcec4, 0xd2d4, 0xc4dc, 0xc0b4, 0xd4c2, 0xcab1, 0xd0d0,
0xbdcc, 0xbfc9, 0xb6d4, 0xbcdb, 0xb1be, 0xb3f6, 0xb8b4, 0xc9fa, 0xb1b8, 0xbcbc,
0xcfc2, 0xbacd, 0xbecd, 0xb3c9, 0xd5e2, 0xb8df, 0xb7d6, 0xc5cc, 0xbfc6, 0xbbe1,
0xceaa, 0xc8e7, 0xcfb5, 0xa1f1, 0xc4ea, 0xb1a8, 0xb6af, 0xc0ed, 0xd3fd, 0xb7a2,
0xc8ab, 0xb7bd, 0xcee5, 0xc2db, 0xbba7, 0xd0d4, 0xb9c9, 0xc3c7, 0xb9fd, 0xcad0,
0xb5e3, 0xbbd6, 0xcfd6, 0xcab5, 0xd2b2, 0xbfb4, 0xb6e0, 0xccec, 0xc7f8, 0xd0c5,
0xcad6, 0xb9d8, 0xb5bd, 0xb7dd, 0xc6f7, 0xcaf5, 0xa3a1, 0xb7a8, 0xb9ab, 0xd2b5,
0xcbf9, 0xcdbc, 0xc6e4, 0xd3da, 0xd0a1, 0xd1a1, 0xd3ce, 0xbfaa, 0xb4e6, 0xc4bf,
0xd7f7, 0xb5e7, 0xcdb3, 0xc7e9, 0xd7ee, 0xc6c0, 0xcfdf, 0xb5d8, 0xb5c0, 0xbead,
0xb4c5, 0xc6b7, 0xc4da, 0xd0c4, 0xb9a4, 0xd4aa, 0xc2bc, 0xc3c0, 0xbaf3, 0xcabd,
0xbcd2, 0xcef1, 0xbdab, 0xa3ad, 0xa3bf, 0xb3a4, 0xb9fb, 0xd6ae, 0xc1bf, 0xbbd8,
0xb8f1, 0xb6f8, 0xb6a8, 0xcde2, 0xbac3, 0xb3cc, 0xccd8, 0xd7d4, 0xcbb5};
0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446,
0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3,
0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548,
0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8,
0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da,
0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3,
0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59,
0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c,
0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44,
0xbba1, 0xbdd1, 0xc2c4, 0xc3b9, 0xc440, 0xc45f};
const int32_t commonChars_gb_18030[] = {
// TODO: This set of data comes from the character frequency-
// of-occurence analysis tool. The data needs to be moved
// into a resource and loaded from there.
0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac,
0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4,
0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4,
0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6,
0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6,
0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7,
0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7,
0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5,
0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2,
0xd2b5, 0xd2bb, 0xd2d4, 0xd3c3, 0xd3d0, 0xd3fd, 0xd4c2, 0xd4da, 0xd5e2, 0xd6d0};
static int32_t binarySearch(const int32_t *array, int32_t len, int32_t value)
{
#if 0
int32_t start = 0, end = len-1;
int32_t mid = (start+end)/2;
@ -101,14 +106,6 @@ static int32_t binarySearch(const int32_t *array, int32_t len, int32_t value)
mid = (start+end)/2;
}
#else
// um... the commonChars arrays aren't sorted...
for(int32_t i = 0; i < len; i += 1) {
if(array[i] == value) {
return i;
}
}
#endif
return -1;
}
@ -273,6 +270,11 @@ const char *CharsetRecog_sjis::getName() const
return "Shift_JIS";
}
const char *CharsetRecog_sjis::getLanguage() const
{
return "jp";
}
CharsetRecog_euc::~CharsetRecog_euc()
{
// nothing to do
@ -351,6 +353,11 @@ const char *CharsetRecog_euc_jp::getName() const
return "EUC-JP";
}
const char *CharsetRecog_euc_jp::getLanguage() const
{
return "jp";
}
int32_t CharsetRecog_euc_jp::match(InputText *det)
{
return match_mbcs(det, commonChars_euc_jp, ARRAY_SIZE(commonChars_euc_jp));
@ -366,11 +373,70 @@ const char *CharsetRecog_euc_kr::getName() const
return "EUC-KR";
}
const char *CharsetRecog_euc_kr::getLanguage() const
{
return "kr";
}
int32_t CharsetRecog_euc_kr::match(InputText *det)
{
return match_mbcs(det, commonChars_euc_kr, ARRAY_SIZE(commonChars_euc_kr));
}
CharsetRecog_big5::~CharsetRecog_big5()
{
// nothing to do
}
UBool CharsetRecog_big5::nextChar(iteratedChar* it, InputText* det)
{
int32_t firstByte;
it->index = it->nextIndex;
it->error = FALSE;
firstByte = it->charValue = it->nextByte(det);
if (firstByte < 0) {
return FALSE;
}
if (firstByte <= 0x7F || firstByte == 0xFF) {
// single byte character.
return TRUE;
}
int32_t secondByte = it->nextByte(det);
if (secondByte < 0) {
return FALSE;
}
it->charValue = (it->charValue << 8) | secondByte;
if (secondByte < 0x40 ||
secondByte == 0x7F ||
secondByte == 0xFF) {
it->error = TRUE;
}
return TRUE;
}
const char *CharsetRecog_big5::getName() const
{
return "Big5";
}
const char *CharsetRecog_big5::getLanguage() const
{
return "zh";
}
int32_t CharsetRecog_big5::match(InputText *det)
{
return match_mbcs(det, commonChars_big5, ARRAY_SIZE(commonChars_big5));
}
CharsetRecog_gb_18030::~CharsetRecog_gb_18030()
{
// nothing to do
@ -435,6 +501,11 @@ const char *CharsetRecog_gb_18030::getName() const
return "GB18030";
}
const char *CharsetRecog_gb_18030::getLanguage() const
{
return "zh";
}
int32_t CharsetRecog_gb_18030::match(InputText *det)
{
return match_mbcs(det, commonChars_gb_18030, ARRAY_SIZE(commonChars_gb_18030));

View file

@ -67,7 +67,8 @@ public:
* @return the charset name.
*/
const char *getName() const = 0 ;
const char *getName() const = 0;
const char *getLanguage() const = 0;
int32_t match(InputText* det) = 0;
/**
@ -100,6 +101,7 @@ public:
int32_t match(InputText *det);
const char *getName() const;
const char *getLanguage() const;
};
@ -115,7 +117,8 @@ class CharsetRecog_euc : public CharsetRecog_mbcs
public:
virtual ~CharsetRecog_euc();
const char* getName() const = 0;
const char *getName() const = 0;
const char *getLanguage() const = 0;
int32_t match(InputText* det) = 0;
/*
@ -137,6 +140,7 @@ public:
virtual ~CharsetRecog_euc_jp();
const char *getName() const;
const char *getLanguage() const;
int32_t match(InputText *det);
};
@ -151,6 +155,25 @@ public:
virtual ~CharsetRecog_euc_kr();
const char *getName() const;
const char *getLanguage() const;
int32_t match(InputText *det);
};
/**
*
* Big5 charset recognizer.
*
*/
class CharsetRecog_big5 : public CharsetRecog_mbcs
{
public:
virtual ~CharsetRecog_big5();
UBool nextChar(iteratedChar* it, InputText* det);
const char *getName() const;
const char *getLanguage() const;
int32_t match(InputText *det);
};
@ -169,6 +192,7 @@ public:
UBool nextChar(iteratedChar* it, InputText* det);
const char *getName() const;
const char *getLanguage() const;
int32_t match(InputText *det);
};

View file

@ -142,15 +142,16 @@ void CharsetDetectionTest::checkEncoding(const UnicodeString &testString, const
ucsdet_setText(csd, bytes, byteLength, &status);
const UCharsetMatch *csm = ucsdet_detect(csd, &status);
int32_t matchCount = 0;
const UCharsetMatch **matches = ucsdet_detectAll(csd, &matchCount, &status);
UnicodeString name(ucsdet_getName(csm, &status));
UnicodeString lang(ucsdet_getLanguage(csm, &status));
UnicodeString name(ucsdet_getName(matches[0], &status));
UnicodeString lang(ucsdet_getLanguage(matches[0], &status));
UChar *decoded = NULL;
int32_t dLength = 0;
if (csm == NULL) {
if (matchCount == 0) {
errln("Encoding detection failure for " + id + ": expected " + eSplit[0] + ", got no matches");
goto bail;
}
@ -159,9 +160,6 @@ void CharsetDetectionTest::checkEncoding(const UnicodeString &testString, const
errln("Encoding detection failure for " + id + ": expected " + eSplit[0] + ", got " + name);
#ifdef DEBUG_DETECT
int32_t matchCount;
const UCharsetMatch **matches = ucsdet_detectAll(csd, &matchCount, &status);
for (int32_t m = 0; m < matchCount; m += 1) {
const char *name = ucsdet_getName(matches[m], &status);
const char *lang = ucsdet_getLanguage(matches[m], &status);
@ -179,7 +177,7 @@ void CharsetDetectionTest::checkEncoding(const UnicodeString &testString, const
}
decoded = NEW_ARRAY(UChar, testLength);
dLength = ucsdet_getUChars(csm, decoded, testLength, &status);
dLength = ucsdet_getUChars(matches[0], decoded, testLength, &status);
if (testString.compare(decoded, dLength) != 0) {
errln("Round-trip error for " + id + ", " + eSplit[0] + ": getUChars() didn't yeild the original string.");

View file

@ -376,7 +376,7 @@
</test-case>
<test-case id="IUC10-zh-Hant" encodings="UTF-8 UTF-32BE UTF-32LE ISO-2022-CN GB18030">
<test-case id="IUC10-zh-Hant" encodings="UTF-8 UTF-32BE UTF-32LE ISO-2022-CN GB18030 Big5">
<!-- Copyright © 1991-2005 Unicode, Inc. All rights reserved. -->
歐洲,軟體及網際網路: