mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 23:10:40 +00:00
ICU-9065 internal API for reverse-full-case-folding data
X-SVN-Rev: 31237
This commit is contained in:
parent
4f84d60612
commit
f31fa446bc
4 changed files with 130 additions and 19 deletions
|
@ -18,6 +18,7 @@
|
|||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "unicode/udata.h" /* UDataInfo */
|
||||
#include "unicode/utf16.h"
|
||||
|
@ -392,6 +393,40 @@ ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length
|
|||
return FALSE; /* string not found */
|
||||
}
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
FullCaseFoldingIterator::FullCaseFoldingIterator()
|
||||
: unfold(reinterpret_cast<const UChar *>(ucase_props_singleton.unfold)),
|
||||
unfoldRows(unfold[UCASE_UNFOLD_ROWS]),
|
||||
unfoldRowWidth(unfold[UCASE_UNFOLD_ROW_WIDTH]),
|
||||
unfoldStringWidth(unfold[UCASE_UNFOLD_STRING_WIDTH]),
|
||||
currentRow(0),
|
||||
rowCpIndex(unfoldStringWidth) {
|
||||
unfold+=unfoldRowWidth;
|
||||
}
|
||||
|
||||
UChar32
|
||||
FullCaseFoldingIterator::next(UnicodeString &full) {
|
||||
// Advance past the last-delivered code point.
|
||||
const UChar *p=unfold+(currentRow*unfoldRowWidth);
|
||||
if(rowCpIndex>=unfoldRowWidth || p[rowCpIndex]==0) {
|
||||
++currentRow;
|
||||
p+=unfoldRowWidth;
|
||||
rowCpIndex=unfoldStringWidth;
|
||||
}
|
||||
if(currentRow>=unfoldRows) { return U_SENTINEL; }
|
||||
// Set "full" to the NUL-terminated string in the first unfold column.
|
||||
int32_t length=unfoldStringWidth;
|
||||
while(length>0 && p[length-1]==0) { --length; }
|
||||
full.setTo(FALSE, p, length);
|
||||
// Return the code point.
|
||||
UChar32 c;
|
||||
U16_NEXT_UNSAFE(p, rowCpIndex, c);
|
||||
return c;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucase_getType(const UCaseProps *csp, UChar32 c) {
|
||||
|
|
|
@ -25,13 +25,23 @@
|
|||
#include "uset_imp.h"
|
||||
#include "udataswp.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
#ifdef __cplusplus
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UnicodeString;
|
||||
|
||||
U_NAMESPACE_END
|
||||
#endif
|
||||
|
||||
/* library API -------------------------------------------------------------- */
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
struct UCaseProps;
|
||||
typedef struct UCaseProps UCaseProps;
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
U_CAPI const UCaseProps * U_EXPORT2
|
||||
ucase_getSingleton(void);
|
||||
|
||||
|
@ -112,6 +122,36 @@ ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa);
|
|||
U_CFUNC UBool U_EXPORT2
|
||||
ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa);
|
||||
|
||||
#ifdef __cplusplus
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Iterator over characters with more than one code point in the full default Case_Folding.
|
||||
*/
|
||||
class U_COMMON_API FullCaseFoldingIterator {
|
||||
public:
|
||||
/** Constructor. */
|
||||
FullCaseFoldingIterator();
|
||||
/**
|
||||
* Returns the next (cp, full) pair where "full" is cp's full default Case_Folding.
|
||||
* Returns a negative cp value at the end of the iteration.
|
||||
*/
|
||||
UChar32 next(UnicodeString &full);
|
||||
private:
|
||||
FullCaseFoldingIterator(const FullCaseFoldingIterator &); // no copy
|
||||
FullCaseFoldingIterator &operator=(const FullCaseFoldingIterator &); // no assignment
|
||||
|
||||
const UChar *unfold;
|
||||
int32_t unfoldRows;
|
||||
int32_t unfoldRowWidth;
|
||||
int32_t unfoldStringWidth;
|
||||
int32_t currentRow;
|
||||
int32_t rowCpIndex;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
#endif
|
||||
|
||||
/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucase_getType(const UCaseProps *csp, UChar32 c);
|
||||
|
@ -128,6 +168,8 @@ ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c);
|
|||
|
||||
/* string case mapping functions */
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* Iterator function for string case mappings, which need to look at the
|
||||
* context (surrounding text) of a given character for conditional mappings.
|
||||
|
@ -162,6 +204,8 @@ struct UCaseContext {
|
|||
};
|
||||
typedef struct UCaseContext UCaseContext;
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#define UCASECONTEXT_INITIALIZER { NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
|
||||
|
||||
enum {
|
||||
|
@ -362,6 +406,4 @@ enum {
|
|||
UCASE_UNFOLD_STRING_WIDTH
|
||||
};
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2009, International Business Machines
|
||||
* Copyright (C) 2002-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -23,6 +23,7 @@
|
|||
#include "unicode/ubrk.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "ucase.h"
|
||||
#include "ustrtest.h"
|
||||
#include "unicode/tstdtmod.h"
|
||||
|
||||
|
@ -32,20 +33,16 @@ StringCaseTest::~StringCaseTest() {}
|
|||
|
||||
void
|
||||
StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
|
||||
if (exec) logln("TestSuite StringCaseTest: ");
|
||||
switch (index) {
|
||||
case 0: name = "TestCaseConversion"; if (exec) TestCaseConversion(); break;
|
||||
case 1:
|
||||
#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
|
||||
name = "TestCasing";
|
||||
if(exec) TestCasing();
|
||||
#else
|
||||
name = "skip";
|
||||
#endif
|
||||
break;
|
||||
|
||||
default: name = ""; break; //needed to end loop
|
||||
if(exec) {
|
||||
logln("TestSuite StringCaseTest: ");
|
||||
}
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
TESTCASE_AUTO(TestCaseConversion);
|
||||
#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
|
||||
TESTCASE_AUTO(TestCasing);
|
||||
#endif
|
||||
TESTCASE_AUTO(TestFullCaseFoldingIterator);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -540,3 +537,39 @@ StringCaseTest::TestCasing() {
|
|||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
StringCaseTest::TestFullCaseFoldingIterator() {
|
||||
UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi");
|
||||
UnicodeString ss=UNICODE_STRING_SIMPLE("ss");
|
||||
FullCaseFoldingIterator iter;
|
||||
int32_t count=0;
|
||||
int32_t countSpecific=0;
|
||||
UChar32 c;
|
||||
UnicodeString full;
|
||||
while((c=iter.next(full))>=0) {
|
||||
++count;
|
||||
// Check that the full Case_Folding has more than 1 code point.
|
||||
if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) {
|
||||
errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c);
|
||||
continue;
|
||||
}
|
||||
// Check that full == Case_Folding(c).
|
||||
UnicodeString cf(c);
|
||||
cf.foldCase();
|
||||
if(full!=cf) {
|
||||
errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c);
|
||||
continue;
|
||||
}
|
||||
// Spot-check a couple of specific cases.
|
||||
if((full==ffi && c==0xfb03) || (full==ss && (c==0xdf || c==0x1e9e))) {
|
||||
++countSpecific;
|
||||
}
|
||||
}
|
||||
if(countSpecific!=3) {
|
||||
errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases");
|
||||
}
|
||||
if(count<70) {
|
||||
errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2012, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -94,7 +94,7 @@ class StringCaseTest: public IntlTest {
|
|||
public:
|
||||
StringCaseTest() {}
|
||||
virtual ~StringCaseTest();
|
||||
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
|
||||
|
||||
void TestCaseConversion();
|
||||
|
@ -104,6 +104,7 @@ public:
|
|||
int32_t whichCase,
|
||||
void *iter, const char *localeID, uint32_t options);
|
||||
void TestCasing();
|
||||
void TestFullCaseFoldingIterator();
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue