mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-9538 add script metadata properties API
X-SVN-Rev: 33255
This commit is contained in:
parent
b1309f6306
commit
ebbc5423ef
10 changed files with 523 additions and 8 deletions
|
@ -1,6 +1,6 @@
|
|||
#******************************************************************************
|
||||
#
|
||||
# Copyright (C) 1999-2012, International Business Machines
|
||||
# Copyright (C) 1999-2013, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
#******************************************************************************
|
||||
|
@ -97,7 +97,7 @@ unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase
|
|||
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \
|
||||
chariter.o schriter.o uchriter.o uiter.o \
|
||||
patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
|
||||
uscript.o usc_impl.o unames.o \
|
||||
uscript.o uscript_props.o usc_impl.o unames.o \
|
||||
utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
|
||||
uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o \
|
||||
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
|
||||
|
|
|
@ -389,6 +389,7 @@
|
|||
<ClCompile Include="uprops.cpp" />
|
||||
<ClCompile Include="usc_impl.c" />
|
||||
<ClCompile Include="uscript.c" />
|
||||
<ClCompile Include="uscript_props.cpp" />
|
||||
<ClCompile Include="uset.cpp" />
|
||||
<ClCompile Include="uset_props.cpp" />
|
||||
<ClCompile Include="usetiter.cpp" />
|
||||
|
|
|
@ -406,6 +406,9 @@
|
|||
<ClCompile Include="uscript.c">
|
||||
<Filter>properties & sets</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="uscript_props.cpp">
|
||||
<Filter>properties & sets</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="uset.cpp">
|
||||
<Filter>properties & sets</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1997-2012, International Business Machines
|
||||
* Copyright (C) 1997-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
|
@ -512,4 +512,111 @@ uscript_getScriptExtensions(UChar32 c,
|
|||
UErrorCode *errorCode);
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Script usage constants.
|
||||
* See UAX #31 Unicode Identifier and Pattern Syntax.
|
||||
* http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
|
||||
*
|
||||
* @draft ICU 51
|
||||
*/
|
||||
typedef enum UScriptUsage {
|
||||
/** Not encoded in Unicode. @draft ICU 51 */
|
||||
USCRIPT_USAGE_NOT_ENCODED,
|
||||
/** Unknown script usage. @draft ICU 51 */
|
||||
USCRIPT_USAGE_UNKNOWN,
|
||||
/** Candidate for Exclusion from Identifiers. @draft ICU 51 */
|
||||
USCRIPT_USAGE_EXCLUDED,
|
||||
/** Limited Use script. @draft ICU 51 */
|
||||
USCRIPT_USAGE_LIMITED_USE,
|
||||
/** Aspirational Use script. @draft ICU 51 */
|
||||
USCRIPT_USAGE_ASPIRATIONAL,
|
||||
/** Recommended script. @draft ICU 51 */
|
||||
USCRIPT_USAGE_RECOMMENDED
|
||||
} UScriptUsage;
|
||||
|
||||
/**
|
||||
* Writes the script sample character string.
|
||||
* This string normally consists of one code point but might be longer.
|
||||
* The string is empty if the script is not encoded.
|
||||
*
|
||||
* @param script script code
|
||||
* @param dest output string array
|
||||
* @param capacity number of UChars in the dest array
|
||||
* @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input
|
||||
* @return the string length, even if U_BUFFER_OVERFLOW_ERROR
|
||||
* @draft ICU 51
|
||||
*/
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
class UnicodeString;
|
||||
U_NAMESPACE_END
|
||||
|
||||
/**
|
||||
* Returns the script sample character string.
|
||||
* This string normally consists of one code point but might be longer.
|
||||
* The string is empty if the script is not encoded.
|
||||
*
|
||||
* @param script script code
|
||||
* @return the sample character string
|
||||
* @draft ICU 51
|
||||
*/
|
||||
U_COMMON_API icu::UnicodeString U_EXPORT2
|
||||
uscript_getSampleUnicodeString(UScriptCode script);
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
|
||||
* Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.
|
||||
*
|
||||
* @param script script code
|
||||
* @return script usage
|
||||
* @see UScriptUsage
|
||||
* @draft ICU 51
|
||||
*/
|
||||
U_DRAFT UScriptUsage U_EXPORT2
|
||||
uscript_getUsage(UScriptCode script);
|
||||
|
||||
/**
|
||||
* Returns TRUE if the script is written right-to-left.
|
||||
* For example, Arab and Hebr.
|
||||
*
|
||||
* @param script script code
|
||||
* @return TRUE if the script is right-to-left
|
||||
* @draft ICU 51
|
||||
*/
|
||||
U_DRAFT UBool U_EXPORT2
|
||||
uscript_isRightToLeft(UScriptCode script);
|
||||
|
||||
/**
|
||||
* Returns TRUE if the script allows line breaks between letters (excluding hyphenation).
|
||||
* Such a script typically requires dictionary-based line breaking.
|
||||
* For example, Hani and Thai.
|
||||
*
|
||||
* @param script script code
|
||||
* @return TRUE if the script allows line breaks between letters
|
||||
* @draft ICU 51
|
||||
*/
|
||||
U_DRAFT UBool U_EXPORT2
|
||||
uscript_breaksBetweenLetters(UScriptCode script);
|
||||
|
||||
/**
|
||||
* Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary.
|
||||
* For example, Latn and Cyrl.
|
||||
*
|
||||
* @param script script code
|
||||
* @return TRUE if the script is cased
|
||||
* @draft ICU 51
|
||||
*/
|
||||
U_DRAFT UBool U_EXPORT2
|
||||
uscript_isCased(UScriptCode script);
|
||||
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
#endif
|
||||
|
|
267
icu4c/source/common/uscript_props.cpp
Normal file
267
icu4c/source/common/uscript_props.cpp
Normal file
|
@ -0,0 +1,267 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: uscript_props.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2013feb16
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
namespace {
|
||||
|
||||
// Script metadata (script properties).
|
||||
// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
|
||||
|
||||
// 0 = NOT_ENCODED, no sample character, default false script properties.
|
||||
// Bits 20.. 0: sample character
|
||||
|
||||
// Bits 23..21: usage
|
||||
const int32_t UNKNOWN = 1 << 21;
|
||||
const int32_t EXCLUSION = 2 << 21;
|
||||
const int32_t LIMITED_USE = 3 << 21;
|
||||
const int32_t ASPIRATIONAL = 4 << 21;
|
||||
const int32_t RECOMMENDED = 5 << 21;
|
||||
|
||||
// Bits 31..24: Single-bit flags
|
||||
const int32_t RTL = 1 << 24;
|
||||
const int32_t LB_LETTERS = 1 << 25;
|
||||
const int32_t CASED = 1 << 26;
|
||||
|
||||
const int32_t SCRIPT_PROPS[] = {
|
||||
// Begin copy-paste output from
|
||||
// icu/tools/trunk/unicode/py/parsescriptmetadata.py
|
||||
0x0040 | UNKNOWN, // Zyyy
|
||||
0x0308 | UNKNOWN, // Zinh
|
||||
0x0628 | RECOMMENDED | RTL, // Arab
|
||||
0x0531 | RECOMMENDED | CASED, // Armn
|
||||
0x0995 | RECOMMENDED, // Beng
|
||||
0x3105 | RECOMMENDED | LB_LETTERS, // Bopo
|
||||
0x13C4 | LIMITED_USE, // Cher
|
||||
0x03E2 | EXCLUSION | CASED, // Copt
|
||||
0x042F | RECOMMENDED | CASED, // Cyrl
|
||||
0x10414 | EXCLUSION | CASED, // Dsrt
|
||||
0x0905 | RECOMMENDED, // Deva
|
||||
0x12A0 | RECOMMENDED, // Ethi
|
||||
0x10D3 | RECOMMENDED, // Geor
|
||||
0x10330 | EXCLUSION, // Goth
|
||||
0x03A9 | RECOMMENDED | CASED, // Grek
|
||||
0x0A95 | RECOMMENDED, // Gujr
|
||||
0x0A15 | RECOMMENDED, // Guru
|
||||
0x5B57 | RECOMMENDED | LB_LETTERS, // Hani
|
||||
0xAC00 | RECOMMENDED, // Hang
|
||||
0x05D0 | RECOMMENDED | RTL, // Hebr
|
||||
0x304B | RECOMMENDED | LB_LETTERS, // Hira
|
||||
0x0C95 | RECOMMENDED, // Knda
|
||||
0x30AB | RECOMMENDED | LB_LETTERS, // Kana
|
||||
0x1780 | RECOMMENDED | LB_LETTERS, // Khmr
|
||||
0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo
|
||||
0x004C | RECOMMENDED | CASED, // Latn
|
||||
0x0D15 | RECOMMENDED, // Mlym
|
||||
0x1826 | ASPIRATIONAL, // Mong
|
||||
0x1000 | RECOMMENDED | LB_LETTERS, // Mymr
|
||||
0x168F | EXCLUSION, // Ogam
|
||||
0x10300 | EXCLUSION, // Ital
|
||||
0x0B15 | RECOMMENDED, // Orya
|
||||
0x16A0 | EXCLUSION, // Runr
|
||||
0x0D85 | RECOMMENDED, // Sinh
|
||||
0x0710 | LIMITED_USE | RTL, // Syrc
|
||||
0x0B95 | RECOMMENDED, // Taml
|
||||
0x0C15 | RECOMMENDED, // Telu
|
||||
0x078C | RECOMMENDED | RTL, // Thaa
|
||||
0x0E17 | RECOMMENDED | LB_LETTERS, // Thai
|
||||
0x0F40 | RECOMMENDED, // Tibt
|
||||
0x14C0 | ASPIRATIONAL, // Cans
|
||||
0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii
|
||||
0x1703 | EXCLUSION, // Tglg
|
||||
0x1723 | EXCLUSION, // Hano
|
||||
0x1743 | EXCLUSION, // Buhd
|
||||
0x1763 | EXCLUSION, // Tagb
|
||||
0x2800 | UNKNOWN, // Brai
|
||||
0x10800 | EXCLUSION | RTL, // Cprt
|
||||
0x1900 | LIMITED_USE, // Limb
|
||||
0x10000 | EXCLUSION, // Linb
|
||||
0x10480 | EXCLUSION, // Osma
|
||||
0x10450 | EXCLUSION, // Shaw
|
||||
0x1950 | LIMITED_USE | LB_LETTERS, // Tale
|
||||
0x10380 | EXCLUSION, // Ugar
|
||||
0,
|
||||
0x1A00 | EXCLUSION, // Bugi
|
||||
0x2C00 | EXCLUSION | CASED, // Glag
|
||||
0x10A00 | EXCLUSION | RTL, // Khar
|
||||
0xA800 | LIMITED_USE, // Sylo
|
||||
0x1980 | LIMITED_USE | LB_LETTERS, // Talu
|
||||
0x2D30 | ASPIRATIONAL, // Tfng
|
||||
0x103A0 | EXCLUSION, // Xpeo
|
||||
0x1B05 | LIMITED_USE | LB_LETTERS, // Bali
|
||||
0x1BC0 | LIMITED_USE, // Batk
|
||||
0,
|
||||
0x11005 | EXCLUSION, // Brah
|
||||
0xAA00 | LIMITED_USE, // Cham
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0x13153 | EXCLUSION, // Egyp
|
||||
0,
|
||||
0x5B57 | RECOMMENDED | LB_LETTERS, // Hans
|
||||
0x5B57 | RECOMMENDED | LB_LETTERS, // Hant
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0xA984 | LIMITED_USE | LB_LETTERS, // Java
|
||||
0xA90A | LIMITED_USE, // Kali
|
||||
0,
|
||||
0,
|
||||
0x1C00 | LIMITED_USE, // Lepc
|
||||
0,
|
||||
0x0840 | LIMITED_USE | RTL, // Mand
|
||||
0,
|
||||
0x10980 | EXCLUSION | RTL, // Mero
|
||||
0x07CA | LIMITED_USE | RTL, // Nkoo
|
||||
0x10C00 | EXCLUSION | RTL, // Orkh
|
||||
0,
|
||||
0xA840 | EXCLUSION, // Phag
|
||||
0x10900 | EXCLUSION | RTL, // Phnx
|
||||
0x16F00 | ASPIRATIONAL, // Plrd
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0xA549 | LIMITED_USE, // Vaii
|
||||
0,
|
||||
0x12000 | EXCLUSION, // Xsux
|
||||
0,
|
||||
0xFFFF | UNKNOWN, // Zzzz
|
||||
0x102A0 | EXCLUSION, // Cari
|
||||
0x304B | RECOMMENDED | LB_LETTERS, // Jpan
|
||||
0x1A20 | LIMITED_USE | LB_LETTERS, // Lana
|
||||
0x10280 | EXCLUSION, // Lyci
|
||||
0x10920 | EXCLUSION | RTL, // Lydi
|
||||
0x1C5A | LIMITED_USE, // Olck
|
||||
0xA930 | EXCLUSION, // Rjng
|
||||
0xA882 | LIMITED_USE, // Saur
|
||||
0,
|
||||
0x1B83 | LIMITED_USE, // Sund
|
||||
0,
|
||||
0xABC0 | LIMITED_USE, // Mtei
|
||||
0x10840 | EXCLUSION | RTL, // Armi
|
||||
0x10B00 | EXCLUSION | RTL, // Avst
|
||||
0x11103 | LIMITED_USE, // Cakm
|
||||
0xAC00 | RECOMMENDED, // Kore
|
||||
0x11083 | EXCLUSION, // Kthi
|
||||
0,
|
||||
0x10B60 | EXCLUSION | RTL, // Phli
|
||||
0,
|
||||
0,
|
||||
0x10B40 | EXCLUSION | RTL, // Prti
|
||||
0x0800 | EXCLUSION | RTL, // Samr
|
||||
0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt
|
||||
0,
|
||||
0,
|
||||
0xA6A0 | LIMITED_USE, // Bamu
|
||||
0xA4D0 | LIMITED_USE, // Lisu
|
||||
0,
|
||||
0x10A60 | EXCLUSION | RTL, // Sarb
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0x109A0 | EXCLUSION | RTL, // Merc
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0x11183 | EXCLUSION, // Shrd
|
||||
0x110D0 | EXCLUSION, // Sora
|
||||
0x11680 | EXCLUSION, // Takr
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
// End copy-paste from parsescriptmetadata.py
|
||||
};
|
||||
|
||||
int32_t getScriptProps(UScriptCode script) {
|
||||
if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) {
|
||||
return SCRIPT_PROPS[script];
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
|
||||
if(U_FAILURE(*pErrorCode)) { return 0; }
|
||||
if(capacity < 0 || (capacity > 0 && dest == NULL)) {
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
int32_t sampleChar = getScriptProps(script) & 0x1fffff;
|
||||
int32_t length;
|
||||
if(sampleChar == 0) {
|
||||
length = 0;
|
||||
} else {
|
||||
length = U16_LENGTH(sampleChar);
|
||||
if(length <= capacity) {
|
||||
int32_t i = 0;
|
||||
U16_APPEND_UNSAFE(dest, i, sampleChar);
|
||||
}
|
||||
}
|
||||
return u_terminateUChars(dest, capacity, length, pErrorCode);
|
||||
}
|
||||
|
||||
U_COMMON_API icu::UnicodeString U_EXPORT2
|
||||
uscript_getSampleUnicodeString(UScriptCode script) {
|
||||
icu::UnicodeString sample;
|
||||
int32_t sampleChar = getScriptProps(script) & 0x1fffff;
|
||||
if(sampleChar != 0) {
|
||||
sample.append(sampleChar);
|
||||
}
|
||||
return sample;
|
||||
}
|
||||
|
||||
U_CAPI UScriptUsage U_EXPORT2
|
||||
uscript_getUsage(UScriptCode script) {
|
||||
return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uscript_isRightToLeft(UScriptCode script) {
|
||||
return (getScriptProps(script) & RTL) != 0;
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uscript_breaksBetweenLetters(UScriptCode script) {
|
||||
return (getScriptProps(script) & LB_LETTERS) != 0;
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uscript_isCased(UScriptCode script) {
|
||||
return (getScriptProps(script) & CASED) != 0;
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
/********************************************************************
|
||||
* Copyright (c) 1997-2012, International Business Machines
|
||||
* Copyright (c) 1997-2013, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -527,6 +527,68 @@ void TestGetScriptExtensions() {
|
|||
}
|
||||
}
|
||||
|
||||
void TestScriptMetadataAPI() {
|
||||
/* API & code coverage. More testing in intltest/ucdtest.cpp. */
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
UChar sample[8];
|
||||
|
||||
if(uscript_getSampleString(USCRIPT_LATIN, sample, LENGTHOF(sample), &errorCode)!=1 ||
|
||||
U_FAILURE(errorCode) ||
|
||||
uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
|
||||
sample[1]!=0) {
|
||||
log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
sample[0]=0xfffe;
|
||||
if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
|
||||
errorCode!=U_BUFFER_OVERFLOW_ERROR ||
|
||||
sample[0]!=0xfffe) {
|
||||
log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
errorCode=U_ZERO_ERROR;
|
||||
if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, LENGTHOF(sample), &errorCode)!=0 ||
|
||||
U_FAILURE(errorCode) ||
|
||||
sample[0]!=0) {
|
||||
log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
sample[0]=0xfffe;
|
||||
if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
|
||||
errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
|
||||
sample[0]!=0xfffe) {
|
||||
log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
|
||||
}
|
||||
|
||||
if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
|
||||
uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_ASPIRATIONAL ||
|
||||
uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
|
||||
uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
|
||||
uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
|
||||
uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
|
||||
uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED) {
|
||||
log_err("uscript_getUsage() failed\n");
|
||||
}
|
||||
|
||||
if(uscript_isRightToLeft(USCRIPT_LATIN) ||
|
||||
uscript_isRightToLeft(USCRIPT_CIRTH) ||
|
||||
!uscript_isRightToLeft(USCRIPT_ARABIC) ||
|
||||
!uscript_isRightToLeft(USCRIPT_HEBREW)) {
|
||||
log_err("uscript_isRightToLeft() failed\n");
|
||||
}
|
||||
|
||||
if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
|
||||
uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
|
||||
!uscript_breaksBetweenLetters(USCRIPT_HAN) ||
|
||||
!uscript_breaksBetweenLetters(USCRIPT_THAI)) {
|
||||
log_err("uscript_breaksBetweenLetters() failed\n");
|
||||
}
|
||||
|
||||
if(uscript_isCased(USCRIPT_CIRTH) ||
|
||||
uscript_isCased(USCRIPT_HAN) ||
|
||||
!uscript_isCased(USCRIPT_LATIN) ||
|
||||
!uscript_isCased(USCRIPT_GREEK)) {
|
||||
log_err("uscript_isCased() failed\n");
|
||||
}
|
||||
}
|
||||
|
||||
void TestBinaryValues() {
|
||||
/*
|
||||
* Unicode 5.1 explicitly defines binary property value aliases.
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 2003-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 2003-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
void TestUScriptCodeAPI(void);
|
||||
void TestHasScript(void);
|
||||
void TestGetScriptExtensions(void);
|
||||
void TestScriptMetadataAPI(void);
|
||||
void TestBinaryValues(void);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2012, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/*******************************************************************************
|
||||
|
@ -184,6 +184,7 @@ void addUnicodeTest(TestNode** root)
|
|||
addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
|
||||
addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
|
||||
addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
|
||||
addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
|
||||
addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
|
||||
addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
|
||||
addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
|||
#include "unicode/uchar.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "cstring.h"
|
||||
#include "hash.h"
|
||||
#include "patternprops.h"
|
||||
|
@ -59,6 +60,7 @@ void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name,
|
|||
TESTCASE_AUTO(TestBinaryValues);
|
||||
TESTCASE_AUTO(TestConsistency);
|
||||
TESTCASE_AUTO(TestPatternProperties);
|
||||
TESTCASE_AUTO(TestScriptMetadata);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -426,3 +428,73 @@ UnicodeTest::compareUSets(const UnicodeSet &a, const UnicodeSet &b,
|
|||
}
|
||||
return same;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* Maps a special script code to the most common script of its encoded characters.
|
||||
*/
|
||||
UScriptCode getCharScript(UScriptCode script) {
|
||||
switch(script) {
|
||||
case USCRIPT_SIMPLIFIED_HAN:
|
||||
case USCRIPT_TRADITIONAL_HAN:
|
||||
return USCRIPT_HAN;
|
||||
case USCRIPT_JAPANESE:
|
||||
return USCRIPT_HIRAGANA;
|
||||
case USCRIPT_KOREAN:
|
||||
return USCRIPT_HANGUL;
|
||||
default:
|
||||
return script;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void UnicodeTest::TestScriptMetadata() {
|
||||
IcuTestErrorCode errorCode(*this, "TestScriptMetadata()");
|
||||
UnicodeSet rtl("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]", errorCode);
|
||||
// So far, sample characters are uppercase.
|
||||
// Georgian is special.
|
||||
UnicodeSet cased("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]", errorCode);
|
||||
for(int32_t sci = 0; sci < USCRIPT_CODE_LIMIT; ++sci) {
|
||||
UScriptCode sc = (UScriptCode)sci;
|
||||
// Run the test with -v to see which script has failures:
|
||||
// .../intltest$ make && ./intltest utility/UnicodeTest/TestScriptMetadata -v | grep -C 3 FAIL
|
||||
logln(uscript_getShortName(sc));
|
||||
UScriptUsage usage = uscript_getUsage(sc);
|
||||
UnicodeString sample = uscript_getSampleUnicodeString(sc);
|
||||
UnicodeSet scriptSet;
|
||||
scriptSet.applyIntPropertyValue(UCHAR_SCRIPT, sc, errorCode);
|
||||
if(usage == USCRIPT_USAGE_NOT_ENCODED) {
|
||||
assertTrue("not encoded, no sample", sample.isEmpty());
|
||||
assertFalse("not encoded, not RTL", uscript_isRightToLeft(sc));
|
||||
assertFalse("not encoded, not LB letters", uscript_breaksBetweenLetters(sc));
|
||||
assertFalse("not encoded, not cased", uscript_isCased(sc));
|
||||
assertTrue("not encoded, no characters", scriptSet.isEmpty());
|
||||
} else {
|
||||
assertFalse("encoded, has a sample character", sample.isEmpty());
|
||||
UChar32 firstChar = sample.char32At(0);
|
||||
UScriptCode charScript = getCharScript(sc);
|
||||
assertEquals("script(sample(script))",
|
||||
charScript, uscript_getScript(firstChar, errorCode));
|
||||
assertEquals("RTL vs. set", rtl.contains(firstChar), uscript_isRightToLeft(sc));
|
||||
assertEquals("cased vs. set", cased.contains(firstChar), uscript_isCased(sc));
|
||||
assertEquals("encoded, has characters", sc == charScript, !scriptSet.isEmpty());
|
||||
if(uscript_isRightToLeft(sc)) {
|
||||
rtl.removeAll(scriptSet);
|
||||
}
|
||||
if(uscript_isCased(sc)) {
|
||||
cased.removeAll(scriptSet);
|
||||
}
|
||||
}
|
||||
}
|
||||
UnicodeString pattern;
|
||||
assertEquals("no remaining RTL characters",
|
||||
UnicodeString("[]"), rtl.toPattern(pattern));
|
||||
assertEquals("no remaining cased characters",
|
||||
UnicodeString("[]"), cased.toPattern(pattern));
|
||||
|
||||
assertTrue("Hani breaks between letters", uscript_breaksBetweenLetters(USCRIPT_HAN));
|
||||
assertTrue("Thai breaks between letters", uscript_breaksBetweenLetters(USCRIPT_THAI));
|
||||
assertFalse("Latn does not break between letters", uscript_breaksBetweenLetters(USCRIPT_LATIN));
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2013, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -37,6 +37,7 @@ public:
|
|||
void TestBinaryValues();
|
||||
void TestConsistency();
|
||||
void TestPatternProperties();
|
||||
void TestScriptMetadata();
|
||||
|
||||
private:
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue