mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-1930 genUCA now reads the new Fractional UCA
X-SVN-Rev: 8873
This commit is contained in:
parent
0441e94935
commit
a4735aec3b
2 changed files with 81 additions and 19 deletions
|
@ -336,7 +336,7 @@ static int32_t hex2num(char hex) {
|
|||
}
|
||||
}
|
||||
|
||||
UCAElements *readAnElement(FILE *data, UErrorCode *status) {
|
||||
UCAElements *readAnElement(FILE *data, tempUCATable *t, UCAConstants *consts, UErrorCode *status) {
|
||||
char buffer[2048], primary[100], secondary[100], tertiary[100];
|
||||
UBool detectedContraction;
|
||||
int32_t i = 0;
|
||||
|
@ -366,21 +366,37 @@ UCAElements *readAnElement(FILE *data, UErrorCode *status) {
|
|||
|
||||
UCAElements *element = ≤ //(UCAElements *)malloc(sizeof(UCAElements));
|
||||
|
||||
// Directives.
|
||||
if(buffer[0] == '[') {
|
||||
const char *vt = "[variable top = ";
|
||||
uint32_t vtLen = (uint32_t)uprv_strlen(vt);
|
||||
if(uprv_strncmp(buffer, vt, vtLen) == 0) {
|
||||
uint32_t cnt = 0;
|
||||
struct {
|
||||
char name[256];
|
||||
uint32_t *what;
|
||||
} vt[3] = { {"[variable top = ", &t->options->variableTopValue},
|
||||
{"[FIRST_IMPLICIT= ", &consts->UCA_PRIMARY_IMPLICIT_MIN},
|
||||
{"[LAST_IMPLICIT= ", &consts->UCA_PRIMARY_IMPLICIT_MAX}
|
||||
};
|
||||
for (cnt = 0; cnt<sizeof(vt)/sizeof(vt[0]); cnt++) {
|
||||
uint32_t vtLen = (uint32_t)uprv_strlen(vt[cnt].name);
|
||||
if(uprv_strncmp(buffer, vt[cnt].name, vtLen) == 0) {
|
||||
element->variableTop = TRUE;
|
||||
if(sscanf(buffer+vtLen, "%4x", &theValue) != 1) /* read first code point */
|
||||
{
|
||||
fprintf(stderr, " scanf(hex) failed!\n ");
|
||||
}
|
||||
element->cPoints[0] = (UChar)theValue;
|
||||
return element; // just a comment, skip whole line
|
||||
} else {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
*(vt[cnt].what) = (UChar)theValue;
|
||||
if(cnt == 1) { // first implicit
|
||||
// we need to set the value for top next
|
||||
//uint32_t nextTop = ucol_prv_calculateImplicitPrimary(0x4E00); // CJK base
|
||||
consts->UCA_NEXT_TOP_VALUE = theValue<<24 | 0x030303;
|
||||
}
|
||||
//element->cPoints[0] = (UChar)theValue;
|
||||
//return element;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
element->variableTop = FALSE;
|
||||
|
||||
|
@ -509,6 +525,7 @@ UCAElements *readAnElement(FILE *data, UErrorCode *status) {
|
|||
|
||||
|
||||
void writeOutData(UCATableHeader *data,
|
||||
UCAConstants *consts,
|
||||
UChar contractions[][3],
|
||||
uint32_t noOfcontractions,
|
||||
const char *outputDir,
|
||||
|
@ -528,8 +545,9 @@ void writeOutData(UCATableHeader *data,
|
|||
noOfcontractions++;
|
||||
|
||||
|
||||
data->contractionUCACombos = size;
|
||||
data->size += paddedsize((noOfcontractions*3*sizeof(UChar)));
|
||||
data->UCAConsts = size;
|
||||
//data->contractionUCACombos = size;
|
||||
data->size += paddedsize(sizeof(UCAConstants)+(noOfcontractions*3*sizeof(UChar)));
|
||||
}
|
||||
|
||||
UNewDataMemory *pData;
|
||||
|
@ -553,6 +571,9 @@ void writeOutData(UCATableHeader *data,
|
|||
}
|
||||
udata_writeBlock(pData, data, size);
|
||||
|
||||
// output the constants here
|
||||
udata_writeBlock(pData, consts, sizeof(UCAConstants));
|
||||
|
||||
if(noOfcontractions != 0) {
|
||||
udata_writeBlock(pData, contractions, noOfcontractions*3*sizeof(UChar));
|
||||
udata_writePadding(pData, paddedsize((noOfcontractions*3*sizeof(UChar))) - noOfcontractions*3*sizeof(uint16_t));
|
||||
|
@ -580,6 +601,37 @@ write_uca_table(const char *filename,
|
|||
UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
|
||||
UChar contractionCEs[256][3];
|
||||
uint32_t noOfContractions = 0;
|
||||
UCAConstants consts = {
|
||||
UCOL_RESET_TOP_VALUE,
|
||||
/*
|
||||
UCOL_FIRST_PRIMARY_IGNORABLE,
|
||||
UCOL_LAST_PRIMARY_IGNORABLE,
|
||||
UCOL_LAST_PRIMARY_IGNORABLE_CONT,
|
||||
UCOL_FIRST_SECONDARY_IGNORABLE,
|
||||
UCOL_LAST_SECONDARY_IGNORABLE,
|
||||
UCOL_FIRST_TERTIARY_IGNORABLE,
|
||||
UCOL_LAST_TERTIARY_IGNORABLE,
|
||||
UCOL_FIRST_VARIABLE,
|
||||
UCOL_LAST_VARIABLE,
|
||||
UCOL_FIRST_NON_VARIABLE,
|
||||
UCOL_LAST_NON_VARIABLE,
|
||||
*/
|
||||
|
||||
UCOL_NEXT_TOP_VALUE,
|
||||
/*
|
||||
UCOL_NEXT_FIRST_PRIMARY_IGNORABLE,
|
||||
UCOL_NEXT_LAST_PRIMARY_IGNORABLE,
|
||||
UCOL_NEXT_FIRST_SECONDARY_IGNORABLE,
|
||||
UCOL_NEXT_LAST_SECONDARY_IGNORABLE,
|
||||
UCOL_NEXT_FIRST_TERTIARY_IGNORABLE,
|
||||
UCOL_NEXT_LAST_TERTIARY_IGNORABLE,
|
||||
UCOL_NEXT_FIRST_VARIABLE,
|
||||
UCOL_NEXT_LAST_VARIABLE,
|
||||
*/
|
||||
|
||||
PRIMARY_IMPLICIT_MIN,
|
||||
PRIMARY_IMPLICIT_MAX
|
||||
};
|
||||
|
||||
|
||||
if(data == NULL) {
|
||||
|
@ -634,9 +686,10 @@ struct {
|
|||
{0xAC00, 0xD7B0, UCOL_SPECIAL_FLAG | (HANGUL_SYLLABLE_TAG << 24) }, //0 HANGUL_SYLLABLE_TAG,/* AC00-D7AF*/
|
||||
{0xD800, 0xDC00, UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG << 24) }, //1 LEAD_SURROGATE_TAG, /* D800-DBFF*/
|
||||
{0xDC00, 0xE000, UCOL_SPECIAL_FLAG | (TRAIL_SURROGATE_TAG << 24) }, //2 TRAIL_SURROGATE DC00-DFFF
|
||||
{0x3400, 0x4DB6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //3 CJK_IMPLICIT_TAG, /* 0x3400-0x4DB5*/
|
||||
{0x4E00, 0x9FA6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //4 CJK_IMPLICIT_TAG, /* 0x4E00-0x9FA5*/
|
||||
{0xF900, 0xFA2E, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //5 CJK_IMPLICIT_TAG, /* 0xF900-0xFA2D*/
|
||||
// Now directly handled in the collation code by the swapCJK function.
|
||||
//{0x3400, 0x4DB6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //3 CJK_IMPLICIT_TAG, /* 0x3400-0x4DB5*/
|
||||
//{0x4E00, 0x9FA6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //4 CJK_IMPLICIT_TAG, /* 0x4E00-0x9FA5*/
|
||||
//{0xF900, 0xFA2E, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //5 CJK_IMPLICIT_TAG, /* 0xF900-0xFA2D*/
|
||||
//{0x20000, 0x2A6D7, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //6 CJK_IMPLICIT_TAG, /* 0x20000-0x2A6D6*/
|
||||
//{0x2F800, 0x2FA1E, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //7 CJK_IMPLICIT_TAG, /* 0x2F800-0x2FA1D*/
|
||||
};
|
||||
|
@ -656,13 +709,15 @@ struct {
|
|||
exit(*status);
|
||||
}
|
||||
|
||||
element = readAnElement(data, status);
|
||||
element = readAnElement(data, t, &consts, status);
|
||||
line++;
|
||||
if(element != NULL) {
|
||||
// we have read the line, now do something sensible with the read data!
|
||||
if(element->variableTop == TRUE && variableTopValue == 0) {
|
||||
t->options->variableTopValue = element->cPoints[0];
|
||||
}
|
||||
|
||||
// Below stuff was taken care of in readAnElement
|
||||
//if(element->variableTop == TRUE && variableTopValue == 0) {
|
||||
// t->options->variableTopValue = element->cPoints[0];
|
||||
//}
|
||||
|
||||
// if element is a contraction, we want to add it to contractions
|
||||
if(element->cSize > 1) { // this is a contraction
|
||||
|
@ -698,6 +753,13 @@ struct {
|
|||
fprintf(stdout, "Expansions size: %i\n", t->expansions->position);
|
||||
}
|
||||
|
||||
|
||||
/* produce canonical closure for table */
|
||||
/* first set up constants for implicit calculation */
|
||||
uprv_uca_initImplicitConstants(consts.UCA_PRIMARY_IMPLICIT_MIN);
|
||||
/* do the closure */
|
||||
uprv_uca_canonicalClosure(t, status);
|
||||
|
||||
/* test */
|
||||
UCATableHeader *myData = uprv_uca_assembleTable(t, status);
|
||||
|
||||
|
@ -709,7 +771,7 @@ struct {
|
|||
fprintf(stdout, "Expansions size: %i\n", t->expansions->position);
|
||||
}
|
||||
|
||||
writeOutData(myData, contractionCEs, noOfContractions, outputDir, copyright, status);
|
||||
writeOutData(myData, &consts, contractionCEs, noOfContractions, outputDir, copyright, status);
|
||||
|
||||
InverseTableHeader *inverse = assembleInverseTable(status);
|
||||
writeOutInverseData(inverse, outputDir, copyright, status);
|
||||
|
|
|
@ -37,7 +37,7 @@ void deleteElement(void *element);
|
|||
int32_t readElement(char **from, char *to, char separator, UErrorCode *status);
|
||||
uint32_t getSingleCEValue(char *primary, char *secondary, char *tertiary, UBool caseBit, UErrorCode *status);
|
||||
void printOutTable(UCATableHeader *myData, UErrorCode *status);
|
||||
UCAElements *readAnElement(FILE *data, UErrorCode *status);
|
||||
UCAElements *readAnElement(FILE *data, tempUCATable *t, UCAConstants *consts, UErrorCode *status);
|
||||
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue