ICU-9065 internal API for reverse-full-case-folding data

X-SVN-Rev: 31237
This commit is contained in:
Markus Scherer 2012-01-20 19:35:13 +00:00
parent 4f84d60612
commit f31fa446bc
4 changed files with 130 additions and 19 deletions

View file

@ -18,6 +18,7 @@
*/
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/uset.h"
#include "unicode/udata.h" /* UDataInfo */
#include "unicode/utf16.h"
@ -392,6 +393,40 @@ ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length
return FALSE; /* string not found */
}
U_NAMESPACE_BEGIN
FullCaseFoldingIterator::FullCaseFoldingIterator()
: unfold(reinterpret_cast<const UChar *>(ucase_props_singleton.unfold)),
unfoldRows(unfold[UCASE_UNFOLD_ROWS]),
unfoldRowWidth(unfold[UCASE_UNFOLD_ROW_WIDTH]),
unfoldStringWidth(unfold[UCASE_UNFOLD_STRING_WIDTH]),
currentRow(0),
rowCpIndex(unfoldStringWidth) {
unfold+=unfoldRowWidth;
}
UChar32
FullCaseFoldingIterator::next(UnicodeString &full) {
// Advance past the last-delivered code point.
const UChar *p=unfold+(currentRow*unfoldRowWidth);
if(rowCpIndex>=unfoldRowWidth || p[rowCpIndex]==0) {
++currentRow;
p+=unfoldRowWidth;
rowCpIndex=unfoldStringWidth;
}
if(currentRow>=unfoldRows) { return U_SENTINEL; }
// Set "full" to the NUL-terminated string in the first unfold column.
int32_t length=unfoldStringWidth;
while(length>0 && p[length-1]==0) { --length; }
full.setTo(FALSE, p, length);
// Return the code point.
UChar32 c;
U16_NEXT_UNSAFE(p, rowCpIndex, c);
return c;
}
U_NAMESPACE_END
/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
U_CAPI int32_t U_EXPORT2
ucase_getType(const UCaseProps *csp, UChar32 c) {

View file

@ -25,13 +25,23 @@
#include "uset_imp.h"
#include "udataswp.h"
U_CDECL_BEGIN
#ifdef __cplusplus
U_NAMESPACE_BEGIN
class UnicodeString;
U_NAMESPACE_END
#endif
/* library API -------------------------------------------------------------- */
U_CDECL_BEGIN
struct UCaseProps;
typedef struct UCaseProps UCaseProps;
U_CDECL_END
U_CAPI const UCaseProps * U_EXPORT2
ucase_getSingleton(void);
@ -112,6 +122,36 @@ ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa);
U_CFUNC UBool U_EXPORT2
ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa);
#ifdef __cplusplus
U_NAMESPACE_BEGIN
/**
* Iterator over characters with more than one code point in the full default Case_Folding.
*/
class U_COMMON_API FullCaseFoldingIterator {
public:
/** Constructor. */
FullCaseFoldingIterator();
/**
* Returns the next (cp, full) pair where "full" is cp's full default Case_Folding.
* Returns a negative cp value at the end of the iteration.
*/
UChar32 next(UnicodeString &full);
private:
FullCaseFoldingIterator(const FullCaseFoldingIterator &); // no copy
FullCaseFoldingIterator &operator=(const FullCaseFoldingIterator &); // no assignment
const UChar *unfold;
int32_t unfoldRows;
int32_t unfoldRowWidth;
int32_t unfoldStringWidth;
int32_t currentRow;
int32_t rowCpIndex;
};
U_NAMESPACE_END
#endif
/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
U_CAPI int32_t U_EXPORT2
ucase_getType(const UCaseProps *csp, UChar32 c);
@ -128,6 +168,8 @@ ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c);
/* string case mapping functions */
U_CDECL_BEGIN
/**
* Iterator function for string case mappings, which need to look at the
* context (surrounding text) of a given character for conditional mappings.
@ -162,6 +204,8 @@ struct UCaseContext {
};
typedef struct UCaseContext UCaseContext;
U_CDECL_END
#define UCASECONTEXT_INITIALIZER { NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
enum {
@ -362,6 +406,4 @@ enum {
UCASE_UNFOLD_STRING_WIDTH
};
U_CDECL_END
#endif

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2002-2009, International Business Machines
* Copyright (C) 2002-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -23,6 +23,7 @@
#include "unicode/ubrk.h"
#include "unicode/unistr.h"
#include "unicode/ucasemap.h"
#include "ucase.h"
#include "ustrtest.h"
#include "unicode/tstdtmod.h"
@ -32,20 +33,16 @@ StringCaseTest::~StringCaseTest() {}
void
StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
if (exec) logln("TestSuite StringCaseTest: ");
switch (index) {
case 0: name = "TestCaseConversion"; if (exec) TestCaseConversion(); break;
case 1:
#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
name = "TestCasing";
if(exec) TestCasing();
#else
name = "skip";
#endif
break;
default: name = ""; break; //needed to end loop
if(exec) {
logln("TestSuite StringCaseTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(TestCaseConversion);
#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
TESTCASE_AUTO(TestCasing);
#endif
TESTCASE_AUTO(TestFullCaseFoldingIterator);
TESTCASE_AUTO_END;
}
void
@ -540,3 +537,39 @@ StringCaseTest::TestCasing() {
}
#endif
}
void
StringCaseTest::TestFullCaseFoldingIterator() {
UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi");
UnicodeString ss=UNICODE_STRING_SIMPLE("ss");
FullCaseFoldingIterator iter;
int32_t count=0;
int32_t countSpecific=0;
UChar32 c;
UnicodeString full;
while((c=iter.next(full))>=0) {
++count;
// Check that the full Case_Folding has more than 1 code point.
if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) {
errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c);
continue;
}
// Check that full == Case_Folding(c).
UnicodeString cf(c);
cf.foldCase();
if(full!=cf) {
errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c);
continue;
}
// Spot-check a couple of specific cases.
if((full==ffi && c==0xfb03) || (full==ss && (c==0xdf || c==0x1e9e))) {
++countSpecific;
}
}
if(countSpecific!=3) {
errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases");
}
if(count<70) {
errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count);
}
}

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2011, International Business Machines Corporation and
* Copyright (c) 1997-2012, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -94,7 +94,7 @@ class StringCaseTest: public IntlTest {
public:
StringCaseTest() {}
virtual ~StringCaseTest();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
void TestCaseConversion();
@ -104,6 +104,7 @@ public:
int32_t whichCase,
void *iter, const char *localeID, uint32_t options);
void TestCasing();
void TestFullCaseFoldingIterator();
};
#endif