ICU-9538 add script metadata properties API

X-SVN-Rev: 33255
This commit is contained in:
Markus Scherer 2013-02-17 00:49:18 +00:00
parent b1309f6306
commit ebbc5423ef
10 changed files with 523 additions and 8 deletions

View file

@ -1,6 +1,6 @@
#******************************************************************************
#
# Copyright (C) 1999-2012, International Business Machines
# Copyright (C) 1999-2013, International Business Machines
# Corporation and others. All Rights Reserved.
#
#******************************************************************************
@ -97,7 +97,7 @@ unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \
chariter.o schriter.o uchriter.o uiter.o \
patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
uscript.o usc_impl.o unames.o \
uscript.o uscript_props.o usc_impl.o unames.o \
utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o \
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \

View file

@ -389,6 +389,7 @@
<ClCompile Include="uprops.cpp" />
<ClCompile Include="usc_impl.c" />
<ClCompile Include="uscript.c" />
<ClCompile Include="uscript_props.cpp" />
<ClCompile Include="uset.cpp" />
<ClCompile Include="uset_props.cpp" />
<ClCompile Include="usetiter.cpp" />

View file

@ -406,6 +406,9 @@
<ClCompile Include="uscript.c">
<Filter>properties &amp; sets</Filter>
</ClCompile>
<ClCompile Include="uscript_props.cpp">
<Filter>properties &amp; sets</Filter>
</ClCompile>
<ClCompile Include="uset.cpp">
<Filter>properties &amp; sets</Filter>
</ClCompile>

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 1997-2012, International Business Machines
* Copyright (C) 1997-2013, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@ -512,4 +512,111 @@ uscript_getScriptExtensions(UChar32 c,
UErrorCode *errorCode);
#endif /* U_HIDE_DRAFT_API */
#ifndef U_HIDE_DRAFT_API
/**
* Script usage constants.
* See UAX #31 Unicode Identifier and Pattern Syntax.
* http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
*
* @draft ICU 51
*/
typedef enum UScriptUsage {
/** Not encoded in Unicode. @draft ICU 51 */
USCRIPT_USAGE_NOT_ENCODED,
/** Unknown script usage. @draft ICU 51 */
USCRIPT_USAGE_UNKNOWN,
/** Candidate for Exclusion from Identifiers. @draft ICU 51 */
USCRIPT_USAGE_EXCLUDED,
/** Limited Use script. @draft ICU 51 */
USCRIPT_USAGE_LIMITED_USE,
/** Aspirational Use script. @draft ICU 51 */
USCRIPT_USAGE_ASPIRATIONAL,
/** Recommended script. @draft ICU 51 */
USCRIPT_USAGE_RECOMMENDED
} UScriptUsage;
/**
* Writes the script sample character string.
* This string normally consists of one code point but might be longer.
* The string is empty if the script is not encoded.
*
* @param script script code
* @param dest output string array
* @param capacity number of UChars in the dest array
* @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input
* @return the string length, even if U_BUFFER_OVERFLOW_ERROR
* @draft ICU 51
*/
U_DRAFT int32_t U_EXPORT2
uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode);
#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN
class UnicodeString;
U_NAMESPACE_END
/**
* Returns the script sample character string.
* This string normally consists of one code point but might be longer.
* The string is empty if the script is not encoded.
*
* @param script script code
* @return the sample character string
* @draft ICU 51
*/
U_COMMON_API icu::UnicodeString U_EXPORT2
uscript_getSampleUnicodeString(UScriptCode script);
#endif
/**
* Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
* Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.
*
* @param script script code
* @return script usage
* @see UScriptUsage
* @draft ICU 51
*/
U_DRAFT UScriptUsage U_EXPORT2
uscript_getUsage(UScriptCode script);
/**
* Returns TRUE if the script is written right-to-left.
* For example, Arab and Hebr.
*
* @param script script code
* @return TRUE if the script is right-to-left
* @draft ICU 51
*/
U_DRAFT UBool U_EXPORT2
uscript_isRightToLeft(UScriptCode script);
/**
* Returns TRUE if the script allows line breaks between letters (excluding hyphenation).
* Such a script typically requires dictionary-based line breaking.
* For example, Hani and Thai.
*
* @param script script code
* @return TRUE if the script allows line breaks between letters
* @draft ICU 51
*/
U_DRAFT UBool U_EXPORT2
uscript_breaksBetweenLetters(UScriptCode script);
/**
* Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary.
* For example, Latn and Cyrl.
*
* @param script script code
* @return TRUE if the script is cased
* @draft ICU 51
*/
U_DRAFT UBool U_EXPORT2
uscript_isCased(UScriptCode script);
#endif /* U_HIDE_DRAFT_API */
#endif

View file

@ -0,0 +1,267 @@
/*
*******************************************************************************
* Copyright (C) 2013, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: uscript_props.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2013feb16
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/uscript.h"
#include "unicode/utf16.h"
#include "ustr_imp.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
namespace {
// Script metadata (script properties).
// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
// 0 = NOT_ENCODED, no sample character, default false script properties.
// Bits 20.. 0: sample character
// Bits 23..21: usage
const int32_t UNKNOWN = 1 << 21;
const int32_t EXCLUSION = 2 << 21;
const int32_t LIMITED_USE = 3 << 21;
const int32_t ASPIRATIONAL = 4 << 21;
const int32_t RECOMMENDED = 5 << 21;
// Bits 31..24: Single-bit flags
const int32_t RTL = 1 << 24;
const int32_t LB_LETTERS = 1 << 25;
const int32_t CASED = 1 << 26;
const int32_t SCRIPT_PROPS[] = {
// Begin copy-paste output from
// icu/tools/trunk/unicode/py/parsescriptmetadata.py
0x0040 | UNKNOWN, // Zyyy
0x0308 | UNKNOWN, // Zinh
0x0628 | RECOMMENDED | RTL, // Arab
0x0531 | RECOMMENDED | CASED, // Armn
0x0995 | RECOMMENDED, // Beng
0x3105 | RECOMMENDED | LB_LETTERS, // Bopo
0x13C4 | LIMITED_USE, // Cher
0x03E2 | EXCLUSION | CASED, // Copt
0x042F | RECOMMENDED | CASED, // Cyrl
0x10414 | EXCLUSION | CASED, // Dsrt
0x0905 | RECOMMENDED, // Deva
0x12A0 | RECOMMENDED, // Ethi
0x10D3 | RECOMMENDED, // Geor
0x10330 | EXCLUSION, // Goth
0x03A9 | RECOMMENDED | CASED, // Grek
0x0A95 | RECOMMENDED, // Gujr
0x0A15 | RECOMMENDED, // Guru
0x5B57 | RECOMMENDED | LB_LETTERS, // Hani
0xAC00 | RECOMMENDED, // Hang
0x05D0 | RECOMMENDED | RTL, // Hebr
0x304B | RECOMMENDED | LB_LETTERS, // Hira
0x0C95 | RECOMMENDED, // Knda
0x30AB | RECOMMENDED | LB_LETTERS, // Kana
0x1780 | RECOMMENDED | LB_LETTERS, // Khmr
0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo
0x004C | RECOMMENDED | CASED, // Latn
0x0D15 | RECOMMENDED, // Mlym
0x1826 | ASPIRATIONAL, // Mong
0x1000 | RECOMMENDED | LB_LETTERS, // Mymr
0x168F | EXCLUSION, // Ogam
0x10300 | EXCLUSION, // Ital
0x0B15 | RECOMMENDED, // Orya
0x16A0 | EXCLUSION, // Runr
0x0D85 | RECOMMENDED, // Sinh
0x0710 | LIMITED_USE | RTL, // Syrc
0x0B95 | RECOMMENDED, // Taml
0x0C15 | RECOMMENDED, // Telu
0x078C | RECOMMENDED | RTL, // Thaa
0x0E17 | RECOMMENDED | LB_LETTERS, // Thai
0x0F40 | RECOMMENDED, // Tibt
0x14C0 | ASPIRATIONAL, // Cans
0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii
0x1703 | EXCLUSION, // Tglg
0x1723 | EXCLUSION, // Hano
0x1743 | EXCLUSION, // Buhd
0x1763 | EXCLUSION, // Tagb
0x2800 | UNKNOWN, // Brai
0x10800 | EXCLUSION | RTL, // Cprt
0x1900 | LIMITED_USE, // Limb
0x10000 | EXCLUSION, // Linb
0x10480 | EXCLUSION, // Osma
0x10450 | EXCLUSION, // Shaw
0x1950 | LIMITED_USE | LB_LETTERS, // Tale
0x10380 | EXCLUSION, // Ugar
0,
0x1A00 | EXCLUSION, // Bugi
0x2C00 | EXCLUSION | CASED, // Glag
0x10A00 | EXCLUSION | RTL, // Khar
0xA800 | LIMITED_USE, // Sylo
0x1980 | LIMITED_USE | LB_LETTERS, // Talu
0x2D30 | ASPIRATIONAL, // Tfng
0x103A0 | EXCLUSION, // Xpeo
0x1B05 | LIMITED_USE | LB_LETTERS, // Bali
0x1BC0 | LIMITED_USE, // Batk
0,
0x11005 | EXCLUSION, // Brah
0xAA00 | LIMITED_USE, // Cham
0,
0,
0,
0,
0x13153 | EXCLUSION, // Egyp
0,
0x5B57 | RECOMMENDED | LB_LETTERS, // Hans
0x5B57 | RECOMMENDED | LB_LETTERS, // Hant
0,
0,
0,
0xA984 | LIMITED_USE | LB_LETTERS, // Java
0xA90A | LIMITED_USE, // Kali
0,
0,
0x1C00 | LIMITED_USE, // Lepc
0,
0x0840 | LIMITED_USE | RTL, // Mand
0,
0x10980 | EXCLUSION | RTL, // Mero
0x07CA | LIMITED_USE | RTL, // Nkoo
0x10C00 | EXCLUSION | RTL, // Orkh
0,
0xA840 | EXCLUSION, // Phag
0x10900 | EXCLUSION | RTL, // Phnx
0x16F00 | ASPIRATIONAL, // Plrd
0,
0,
0,
0,
0,
0,
0xA549 | LIMITED_USE, // Vaii
0,
0x12000 | EXCLUSION, // Xsux
0,
0xFFFF | UNKNOWN, // Zzzz
0x102A0 | EXCLUSION, // Cari
0x304B | RECOMMENDED | LB_LETTERS, // Jpan
0x1A20 | LIMITED_USE | LB_LETTERS, // Lana
0x10280 | EXCLUSION, // Lyci
0x10920 | EXCLUSION | RTL, // Lydi
0x1C5A | LIMITED_USE, // Olck
0xA930 | EXCLUSION, // Rjng
0xA882 | LIMITED_USE, // Saur
0,
0x1B83 | LIMITED_USE, // Sund
0,
0xABC0 | LIMITED_USE, // Mtei
0x10840 | EXCLUSION | RTL, // Armi
0x10B00 | EXCLUSION | RTL, // Avst
0x11103 | LIMITED_USE, // Cakm
0xAC00 | RECOMMENDED, // Kore
0x11083 | EXCLUSION, // Kthi
0,
0x10B60 | EXCLUSION | RTL, // Phli
0,
0,
0x10B40 | EXCLUSION | RTL, // Prti
0x0800 | EXCLUSION | RTL, // Samr
0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt
0,
0,
0xA6A0 | LIMITED_USE, // Bamu
0xA4D0 | LIMITED_USE, // Lisu
0,
0x10A60 | EXCLUSION | RTL, // Sarb
0,
0,
0,
0,
0,
0,
0,
0x109A0 | EXCLUSION | RTL, // Merc
0,
0,
0,
0,
0,
0,
0,
0,
0,
0x11183 | EXCLUSION, // Shrd
0x110D0 | EXCLUSION, // Sora
0x11680 | EXCLUSION, // Takr
0,
0,
0,
0,
0,
// End copy-paste from parsescriptmetadata.py
};
int32_t getScriptProps(UScriptCode script) {
if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) {
return SCRIPT_PROPS[script];
} else {
return 0;
}
}
} // namespace
U_CAPI int32_t U_EXPORT2
uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
if(U_FAILURE(*pErrorCode)) { return 0; }
if(capacity < 0 || (capacity > 0 && dest == NULL)) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
int32_t sampleChar = getScriptProps(script) & 0x1fffff;
int32_t length;
if(sampleChar == 0) {
length = 0;
} else {
length = U16_LENGTH(sampleChar);
if(length <= capacity) {
int32_t i = 0;
U16_APPEND_UNSAFE(dest, i, sampleChar);
}
}
return u_terminateUChars(dest, capacity, length, pErrorCode);
}
U_COMMON_API icu::UnicodeString U_EXPORT2
uscript_getSampleUnicodeString(UScriptCode script) {
icu::UnicodeString sample;
int32_t sampleChar = getScriptProps(script) & 0x1fffff;
if(sampleChar != 0) {
sample.append(sampleChar);
}
return sample;
}
U_CAPI UScriptUsage U_EXPORT2
uscript_getUsage(UScriptCode script) {
return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
}
U_CAPI UBool U_EXPORT2
uscript_isRightToLeft(UScriptCode script) {
return (getScriptProps(script) & RTL) != 0;
}
U_CAPI UBool U_EXPORT2
uscript_breaksBetweenLetters(UScriptCode script) {
return (getScriptProps(script) & LB_LETTERS) != 0;
}
U_CAPI UBool U_EXPORT2
uscript_isCased(UScriptCode script) {
return (getScriptProps(script) & CASED) != 0;
}

View file

@ -1,5 +1,5 @@
/********************************************************************
* Copyright (c) 1997-2012, International Business Machines
* Copyright (c) 1997-2013, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************/
@ -527,6 +527,68 @@ void TestGetScriptExtensions() {
}
}
void TestScriptMetadataAPI() {
/* API & code coverage. More testing in intltest/ucdtest.cpp. */
UErrorCode errorCode=U_ZERO_ERROR;
UChar sample[8];
if(uscript_getSampleString(USCRIPT_LATIN, sample, LENGTHOF(sample), &errorCode)!=1 ||
U_FAILURE(errorCode) ||
uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
sample[1]!=0) {
log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
}
sample[0]=0xfffe;
if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
errorCode!=U_BUFFER_OVERFLOW_ERROR ||
sample[0]!=0xfffe) {
log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, LENGTHOF(sample), &errorCode)!=0 ||
U_FAILURE(errorCode) ||
sample[0]!=0) {
log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
}
sample[0]=0xfffe;
if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
sample[0]!=0xfffe) {
log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
}
if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_ASPIRATIONAL ||
uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED) {
log_err("uscript_getUsage() failed\n");
}
if(uscript_isRightToLeft(USCRIPT_LATIN) ||
uscript_isRightToLeft(USCRIPT_CIRTH) ||
!uscript_isRightToLeft(USCRIPT_ARABIC) ||
!uscript_isRightToLeft(USCRIPT_HEBREW)) {
log_err("uscript_isRightToLeft() failed\n");
}
if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
!uscript_breaksBetweenLetters(USCRIPT_HAN) ||
!uscript_breaksBetweenLetters(USCRIPT_THAI)) {
log_err("uscript_breaksBetweenLetters() failed\n");
}
if(uscript_isCased(USCRIPT_CIRTH) ||
uscript_isCased(USCRIPT_HAN) ||
!uscript_isCased(USCRIPT_LATIN) ||
!uscript_isCased(USCRIPT_GREEK)) {
log_err("uscript_isCased() failed\n");
}
}
void TestBinaryValues() {
/*
* Unicode 5.1 explicitly defines binary property value aliases.

View file

@ -1,10 +1,11 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 2003-2010, International Business Machines Corporation and
* Copyright (c) 2003-2013, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
void TestUScriptCodeAPI(void);
void TestHasScript(void);
void TestGetScriptExtensions(void);
void TestScriptMetadataAPI(void);
void TestBinaryValues(void);

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2012, International Business Machines Corporation and
* Copyright (c) 1997-2013, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/*******************************************************************************
@ -184,6 +184,7 @@ void addUnicodeTest(TestNode** root)
addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2011, International Business Machines Corporation and
* Copyright (c) 1997-2013, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -8,6 +8,7 @@
#include "unicode/uchar.h"
#include "unicode/uniset.h"
#include "unicode/putil.h"
#include "unicode/uscript.h"
#include "cstring.h"
#include "hash.h"
#include "patternprops.h"
@ -59,6 +60,7 @@ void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name,
TESTCASE_AUTO(TestBinaryValues);
TESTCASE_AUTO(TestConsistency);
TESTCASE_AUTO(TestPatternProperties);
TESTCASE_AUTO(TestScriptMetadata);
TESTCASE_AUTO_END;
}
@ -426,3 +428,73 @@ UnicodeTest::compareUSets(const UnicodeSet &a, const UnicodeSet &b,
}
return same;
}
namespace {
/**
* Maps a special script code to the most common script of its encoded characters.
*/
UScriptCode getCharScript(UScriptCode script) {
switch(script) {
case USCRIPT_SIMPLIFIED_HAN:
case USCRIPT_TRADITIONAL_HAN:
return USCRIPT_HAN;
case USCRIPT_JAPANESE:
return USCRIPT_HIRAGANA;
case USCRIPT_KOREAN:
return USCRIPT_HANGUL;
default:
return script;
}
}
} // namespace
void UnicodeTest::TestScriptMetadata() {
IcuTestErrorCode errorCode(*this, "TestScriptMetadata()");
UnicodeSet rtl("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]", errorCode);
// So far, sample characters are uppercase.
// Georgian is special.
UnicodeSet cased("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]", errorCode);
for(int32_t sci = 0; sci < USCRIPT_CODE_LIMIT; ++sci) {
UScriptCode sc = (UScriptCode)sci;
// Run the test with -v to see which script has failures:
// .../intltest$ make && ./intltest utility/UnicodeTest/TestScriptMetadata -v | grep -C 3 FAIL
logln(uscript_getShortName(sc));
UScriptUsage usage = uscript_getUsage(sc);
UnicodeString sample = uscript_getSampleUnicodeString(sc);
UnicodeSet scriptSet;
scriptSet.applyIntPropertyValue(UCHAR_SCRIPT, sc, errorCode);
if(usage == USCRIPT_USAGE_NOT_ENCODED) {
assertTrue("not encoded, no sample", sample.isEmpty());
assertFalse("not encoded, not RTL", uscript_isRightToLeft(sc));
assertFalse("not encoded, not LB letters", uscript_breaksBetweenLetters(sc));
assertFalse("not encoded, not cased", uscript_isCased(sc));
assertTrue("not encoded, no characters", scriptSet.isEmpty());
} else {
assertFalse("encoded, has a sample character", sample.isEmpty());
UChar32 firstChar = sample.char32At(0);
UScriptCode charScript = getCharScript(sc);
assertEquals("script(sample(script))",
charScript, uscript_getScript(firstChar, errorCode));
assertEquals("RTL vs. set", rtl.contains(firstChar), uscript_isRightToLeft(sc));
assertEquals("cased vs. set", cased.contains(firstChar), uscript_isCased(sc));
assertEquals("encoded, has characters", sc == charScript, !scriptSet.isEmpty());
if(uscript_isRightToLeft(sc)) {
rtl.removeAll(scriptSet);
}
if(uscript_isCased(sc)) {
cased.removeAll(scriptSet);
}
}
}
UnicodeString pattern;
assertEquals("no remaining RTL characters",
UnicodeString("[]"), rtl.toPattern(pattern));
assertEquals("no remaining cased characters",
UnicodeString("[]"), cased.toPattern(pattern));
assertTrue("Hani breaks between letters", uscript_breaksBetweenLetters(USCRIPT_HAN));
assertTrue("Thai breaks between letters", uscript_breaksBetweenLetters(USCRIPT_THAI));
assertFalse("Latn does not break between letters", uscript_breaksBetweenLetters(USCRIPT_LATIN));
}

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2011, International Business Machines Corporation and
* Copyright (c) 1997-2013, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -37,6 +37,7 @@ public:
void TestBinaryValues();
void TestConsistency();
void TestPatternProperties();
void TestScriptMetadata();
private: