ICU-4220 Use new process type to strip away comments,

and use the process rb type to explicitly declare special internal type handling.

X-SVN-Rev: 17290
This commit is contained in:
George Rhoten 2005-03-08 07:07:22 +00:00
parent e07f0299f0
commit 10cdd69133
3 changed files with 422 additions and 303 deletions

View file

@ -10,7 +10,7 @@
* ICU <specials> source: ../../../locale/icu/collation/root.xml
*/
root{
"%%UCARULES":include{"../unidata/UCARules.txt"}
"%%UCARULES":process(uca_rules){"../unidata/UCARules.txt"}
Version{"1.2"}
collations{
default{"standard"}

View file

@ -52,85 +52,85 @@ root{
Fullwidth-Halfwidth {
file {
resource:include{"Fullwidth_Halfwidth.txt"}
resource:process(transliterator){"Fullwidth_Halfwidth.txt"}
direction{"FORWARD"}
}
}
Halfwidth-Fullwidth {
file {
resource:include{"Fullwidth_Halfwidth.txt"}
resource:process(transliterator){"Fullwidth_Halfwidth.txt"}
direction{"REVERSE"}
}
}
Latin-Cyrillic {
file {
resource:include{"Cyrillic_Latin.txt"}
resource:process(transliterator){"Cyrillic_Latin.txt"}
direction{"REVERSE"}
}
}
Cyrillic-Latin {
file {
resource:include{"Cyrillic_Latin.txt"}
resource:process(transliterator){"Cyrillic_Latin.txt"}
direction{"FORWARD"}
}
}
Latin-Hebrew {
file {
resource:include{"Hebrew_Latin.txt"}
resource:process(transliterator){"Hebrew_Latin.txt"}
direction{"REVERSE"}
}
}
Hebrew-Latin {
file {
resource:include{"Hebrew_Latin.txt"}
resource:process(transliterator){"Hebrew_Latin.txt"}
direction{"FORWARD"}
}
}
Latin-Arabic {
file {
resource:include{"Arabic_Latin.txt"}
resource:process(transliterator){"Arabic_Latin.txt"}
direction{"REVERSE"}
}
}
Arabic-Latin {
file {
resource:include{"Arabic_Latin.txt"}
resource:process(transliterator){"Arabic_Latin.txt"}
direction{"FORWARD"}
}
}
Tone-Digit {
internal {
resource:include{"Tone_Digit.txt"}
resource:process(transliterator){"Tone_Digit.txt"}
direction{"FORWARD"}
}
}
Digit-Tone {
internal {
resource:include{"Tone_Digit.txt"}
resource:process(transliterator){"Tone_Digit.txt"}
direction{"REVERSE"}
}
}
Latin-NumericPinyin {
file {
resource:include{"Latin_NumericPinyin.txt"}
resource:process(transliterator){"Latin_NumericPinyin.txt"}
direction{"FORWARD"}
}
}
NumericPinyin-Latin {
file {
resource:include{"Latin_NumericPinyin.txt"}
resource:process(transliterator){"Latin_NumericPinyin.txt"}
direction{"REVERSE"}
}
}
Han-Spacedhan {
internal {
resource:include{"Han_Spacedhan.txt"}
resource:process(transliterator){"Han_Spacedhan.txt"}
direction{"FORWARD"}
}
}
@ -140,7 +140,7 @@ root{
Han-Latin {
file {
resource:include{"Han_Latin.txt"}
resource:process(transliterator){"Han_Latin.txt"}
direction{"FORWARD"}
}
}
@ -158,52 +158,52 @@ root{
Latin-Greek {
file {
resource:include{"Greek_Latin.txt"}
resource:process(transliterator){"Greek_Latin.txt"}
direction{"REVERSE"}
}
}
Greek-Latin {
file {
resource:include{"Greek_Latin.txt"}
resource:process(transliterator){"Greek_Latin.txt"}
direction{"FORWARD"}
}
}
Latin-Greek/UNGEGN {
file {
resource:include{"Greek_Latin_UNGEGN.txt"}
resource:process(transliterator){"Greek_Latin_UNGEGN.txt"}
direction{"REVERSE"}
}
}
Greek-Latin/UNGEGN {
file {
resource:include{"Greek_Latin_UNGEGN.txt"}
resource:process(transliterator){"Greek_Latin_UNGEGN.txt"}
direction{"FORWARD"}
}
}
Latin-Katakana {
file {
resource:include{"Latin_Katakana.txt"}
resource:process(transliterator){"Latin_Katakana.txt"}
direction{"FORWARD"}
}
}
Katakana-Latin {
file {
resource:include{"Latin_Katakana.txt"}
resource:process(transliterator){"Latin_Katakana.txt"}
direction{"REVERSE"}
}
}
Latin-Hiragana {
file {
resource:include{"Hiragana_Latin.txt"}
resource:process(transliterator){"Hiragana_Latin.txt"}
direction{"REVERSE"}
}
}
Hiragana-Latin {
file {
resource:include{"Hiragana_Latin.txt"}
resource:process(transliterator){"Hiragana_Latin.txt"}
direction{"FORWARD"}
}
}
@ -212,33 +212,33 @@ root{
Thai-ThaiSemi {
internal {
resource:include{"Thai_ThaiSemi.txt"}
resource:process(transliterator){"Thai_ThaiSemi.txt"}
direction{"FORWARD"}
}
}
Thai-ThaiLogical {
internal {
resource:include{"Thai_ThaiLogical.txt"}
resource:process(transliterator){"Thai_ThaiLogical.txt"}
direction{"FORWARD"}
}
}
ThaiLogical-Thai {
internal {
resource:include{"Thai_ThaiLogical.txt"}
resource:process(transliterator){"Thai_ThaiLogical.txt"}
direction{"REVERSE"}
}
}
ThaiLogical-Latin {
internal {
resource:include{"ThaiLogical_Latin.txt"}
resource:process(transliterator){"ThaiLogical_Latin.txt"}
direction{"FORWARD"}
}
}
Latin-ThaiLogical {
internal {
resource:include{"ThaiLogical_Latin.txt"}
resource:process(transliterator){"ThaiLogical_Latin.txt"}
direction{"REVERSE"}
}
}
@ -258,39 +258,39 @@ root{
Hiragana-Katakana {
file {
resource:include{"Hiragana_Katakana.txt"}
resource:process(transliterator){"Hiragana_Katakana.txt"}
direction{"FORWARD"}
}
}
Katakana-Hiragana {
file {
resource:include{"Hiragana_Katakana.txt"}
resource:process(transliterator){"Hiragana_Katakana.txt"}
direction{"REVERSE"}
}
}
Any-Accents {
file {
resource:include{"Any_Accents.txt"}
resource:process(transliterator){"Any_Accents.txt"}
direction{"FORWARD"}
}
}
Accents-Any {
file {
resource:include{"Any_Accents.txt"}
resource:process(transliterator){"Any_Accents.txt"}
direction{"REVERSE"}
}
}
Any-Publishing {
file {
resource:include{"Any_Publishing.txt"}
resource:process(transliterator){"Any_Publishing.txt"}
direction{"FORWARD"}
}
}
Publishing-Any {
file {
resource:include{"Any_Publishing.txt"}
resource:process(transliterator){"Any_Publishing.txt"}
direction{"REVERSE"}
}
}
@ -301,13 +301,13 @@ root{
LowerLatin-Jamo {
internal {
resource:include{"Latin_Jamo.txt"}
resource:process(transliterator){"Latin_Jamo.txt"}
direction{"FORWARD"}
}
}
Jamo-LowerLatin {
internal {
resource:include{"Latin_Jamo.txt"}
resource:process(transliterator){"Latin_Jamo.txt"}
direction{"REVERSE"}
}
}
@ -327,122 +327,122 @@ root{
// Inter-Indic composed rules
Latin-InterIndic {
internal {
resource:include{"Latin_InterIndic.txt"}
resource:process(transliterator){"Latin_InterIndic.txt"}
direction{"FORWARD"}
}
}
Devanagari-InterIndic {
internal {
resource:include{"Devanagari_InterIndic.txt"}
resource:process(transliterator){"Devanagari_InterIndic.txt"}
direction{"FORWARD"}
}
}
Bengali-InterIndic {
internal {
resource:include{"Bengali_InterIndic.txt"}
resource:process(transliterator){"Bengali_InterIndic.txt"}
direction{"FORWARD"}
}
}
Gurmukhi-InterIndic {
internal {
resource:include{"Gurmukhi_InterIndic.txt"}
resource:process(transliterator){"Gurmukhi_InterIndic.txt"}
direction{"FORWARD"}
}
}
Gujarati-InterIndic {
internal {
resource:include{"Gujarati_InterIndic.txt"}
resource:process(transliterator){"Gujarati_InterIndic.txt"}
direction{"FORWARD"}
}
}
Oriya-InterIndic {
internal {
resource:include{"Oriya_InterIndic.txt"}
resource:process(transliterator){"Oriya_InterIndic.txt"}
direction{"FORWARD"}
}
}
Tamil-InterIndic {
internal {
resource:include{"Tamil_InterIndic.txt"}
resource:process(transliterator){"Tamil_InterIndic.txt"}
direction{"FORWARD"}
}
}
Telugu-InterIndic {
internal {
resource:include{"Telugu_InterIndic.txt"}
resource:process(transliterator){"Telugu_InterIndic.txt"}
direction{"FORWARD"}
}
}
Kannada-InterIndic {
internal {
resource:include{"Kannada_InterIndic.txt"}
resource:process(transliterator){"Kannada_InterIndic.txt"}
direction{"FORWARD"}
}
}
Malayalam-InterIndic {
internal {
resource:include{"Malayalam_InterIndic.txt"}
resource:process(transliterator){"Malayalam_InterIndic.txt"}
direction{"FORWARD"}
}
}
InterIndic-Latin {
internal {
resource:include{"InterIndic_Latin.txt"}
resource:process(transliterator){"InterIndic_Latin.txt"}
direction{"FORWARD"}
}
}
InterIndic-Devanagari {
internal {
resource:include{"InterIndic_Devanagari.txt"}
resource:process(transliterator){"InterIndic_Devanagari.txt"}
direction{"FORWARD"}
}
}
InterIndic-Bengali {
internal {
resource:include{"InterIndic_Bengali.txt"}
resource:process(transliterator){"InterIndic_Bengali.txt"}
direction{"FORWARD"}
}
}
InterIndic-Gurmukhi {
internal {
resource:include{"InterIndic_Gurmukhi.txt"}
resource:process(transliterator){"InterIndic_Gurmukhi.txt"}
direction{"FORWARD"}
}
}
InterIndic-Gujarati {
internal {
resource:include{"InterIndic_Gujarati.txt"}
resource:process(transliterator){"InterIndic_Gujarati.txt"}
direction{"FORWARD"}
}
}
InterIndic-Oriya {
internal {
resource:include{"InterIndic_Oriya.txt"}
resource:process(transliterator){"InterIndic_Oriya.txt"}
direction{"FORWARD"}
}
}
InterIndic-Tamil {
internal {
resource:include{"InterIndic_Tamil.txt"}
resource:process(transliterator){"InterIndic_Tamil.txt"}
direction{"FORWARD"}
}
}
InterIndic-Telugu {
internal {
resource:include{"InterIndic_Telugu.txt"}
resource:process(transliterator){"InterIndic_Telugu.txt"}
direction{"FORWARD"}
}
}
InterIndic-Kannada {
internal {
resource:include{"InterIndic_Kannada.txt"}
resource:process(transliterator){"InterIndic_Kannada.txt"}
direction{"FORWARD"}
}
}
InterIndic-Malayalam {
internal {
resource:include{"InterIndic_Malayalam.txt"}
resource:process(transliterator){"InterIndic_Malayalam.txt"}
direction{"FORWARD"}
}
}
@ -733,25 +733,25 @@ root{
# Comment these out; they are only for testing
Latin-Han/definition{
file {
resource:include{"Han_Latin_Definition.txt"}
resource:process(transliterator){"Han_Latin_Definition.txt"}
direction{"REVERSE"}
}
}
Han-Latin/definition{
file {
resource:include{"Han_Latin_Definition.txt"}
resource:process(transliterator){"Han_Latin_Definition.txt"}
direction{"FORWARD"}
}
}
Latin-Han/EDICT{
file {
resource:include{"Han_Latin_EDICT.txt"}
resource:process(transliterator){"Han_Latin_EDICT.txt"}
direction{"REVERSE"}
}
}
Han-Latin/EDICT{
file {
resource:include{"Han_Latin_EDICT.txt"}
resource:process(transliterator){"Han_Latin_EDICT.txt"}
direction{"FORWARD"}
}
}

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 1998-2004, International Business Machines
* Copyright (C) 1998-2005, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -28,6 +28,7 @@
#include "read.h"
#include "ustr.h"
#include "reslist.h"
#include "rbt_pars.h"
#include "unicode/ustring.h"
#include "unicode/putil.h"
#include <stdio.h>
@ -35,7 +36,6 @@
/* Number of tokens to read ahead of the current stream position */
#define MAX_LOOKAHEAD 3
#define U_ICU_UNIDATA "unidata"
#define CR 0x000D
#define LF 0x000A
#define SPACE 0x0020
@ -43,52 +43,14 @@
#define ESCAPE 0x005C
#define HASH 0x0023
#define QUOTE 0x0027
#define ZERO 0x0030
#define STARTCOMMAND 0x005B
#define ENDCOMMAND 0x005D
#define OPENSQBRACKET 0x005B
#define CLOSESQBRACKET 0x005D
U_STRING_DECL(k_type_string, "string", 6);
U_STRING_DECL(k_type_binary, "binary", 6);
U_STRING_DECL(k_type_bin, "bin", 3);
U_STRING_DECL(k_type_table, "table", 5);
U_STRING_DECL(k_type_int, "int", 3);
U_STRING_DECL(k_type_integer, "integer", 7);
U_STRING_DECL(k_type_array, "array", 5);
U_STRING_DECL(k_type_alias, "alias", 5);
U_STRING_DECL(k_type_intvector, "intvector", 9);
U_STRING_DECL(k_type_import, "import", 6);
U_STRING_DECL(k_type_include, "include", 7);
U_STRING_DECL(k_type_reserved, "reserved", 8);
enum EResourceType
{
RT_UNKNOWN,
RT_STRING,
RT_BINARY,
RT_TABLE,
RT_INTEGER,
RT_ARRAY,
RT_ALIAS,
RT_INTVECTOR,
RT_IMPORT,
RT_INCLUDE,
RT_RESERVED
};
/* only used for debugging */
const char *resourceNames[] =
{
"Unknown",
"String",
"Binary",
"Table",
"Integer",
"Array",
"Alias",
"Int vector",
"Import",
"Include",
"Reserved",
};
typedef struct SResource *
ParseResourceFunction(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
struct Lookahead
{
@ -126,27 +88,9 @@ static UBool gMakeBinaryCollation = TRUE;
static struct SResource *parseResource(char *tag, const struct UString *comment, UErrorCode *status);
void initParser(UBool makeBinaryCollation)
{
uint32_t i;
U_STRING_INIT(k_type_string, "string", 6);
U_STRING_INIT(k_type_binary, "binary", 6);
U_STRING_INIT(k_type_bin, "bin", 3);
U_STRING_INIT(k_type_table, "table", 5);
U_STRING_INIT(k_type_int, "int", 3);
U_STRING_INIT(k_type_integer, "integer", 7);
U_STRING_INIT(k_type_array, "array", 5);
U_STRING_INIT(k_type_alias, "alias", 5);
U_STRING_INIT(k_type_intvector, "intvector", 9);
U_STRING_INIT(k_type_import, "import", 6);
U_STRING_INIT(k_type_reserved, "reserved", 8);
U_STRING_INIT(k_type_include, "include", 7);
for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
{
ustr_init(&lookahead[i].value);
}
gMakeBinaryCollation = makeBinaryCollation;
static UChar toHexDigit(uint8_t ch) {
static const char gHexChars[] = "0123456789ABCDEF";
return gHexChars[ch];
}
/* The nature of the lookahead buffer:
@ -317,64 +261,11 @@ static char *getInvariantString(uint32_t *line, struct UString *comment, UErrorC
return result;
}
static enum EResourceType
parseResourceType(UErrorCode *status)
{
struct UString *tokenValue;
struct UString comment;
enum EResourceType result = RT_UNKNOWN;
uint32_t line=0;
ustr_init(&comment);
expect(TOK_STRING, &tokenValue, &comment, &line, status);
if (U_FAILURE(*status))
{
return RT_UNKNOWN;
}
*status = U_ZERO_ERROR;
if (u_strcmp(tokenValue->fChars, k_type_string) == 0) {
result = RT_STRING;
} else if (u_strcmp(tokenValue->fChars, k_type_array) == 0) {
result = RT_ARRAY;
} else if (u_strcmp(tokenValue->fChars, k_type_alias) == 0) {
result = RT_ALIAS;
} else if (u_strcmp(tokenValue->fChars, k_type_table) == 0) {
result = RT_TABLE;
} else if (u_strcmp(tokenValue->fChars, k_type_binary) == 0) {
result = RT_BINARY;
} else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
result = RT_BINARY;
} else if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
result = RT_INTEGER;
} else if (u_strcmp(tokenValue->fChars, k_type_integer) == 0) {
result = RT_INTEGER;
} else if (u_strcmp(tokenValue->fChars, k_type_intvector) == 0) {
result = RT_INTVECTOR;
} else if (u_strcmp(tokenValue->fChars, k_type_import) == 0) {
result = RT_IMPORT;
} else if (u_strcmp(tokenValue->fChars, k_type_include) == 0) {
result = RT_INCLUDE;
} else if (u_strcmp(tokenValue->fChars, k_type_reserved) == 0) {
result = RT_RESERVED;
} else {
char tokenBuffer[1024];
u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
*status = U_INVALID_FORMAT_ERROR;
error(line, "unknown resource type '%s'", tokenBuffer);
}
return result;
}
static struct SResource *
parseUCARules(char *tag, uint32_t startline, UErrorCode *status)
parseUCARules(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
{
struct SResource *result = NULL;
struct UString *tokenValue;
struct UString comment;
FileStream *file = NULL;
char filename[256] = { '\0' };
char cs[128] = { '\0' };
@ -389,8 +280,7 @@ parseUCARules(char *tag, uint32_t startline, UErrorCode *status)
UChar *targetLimit = NULL;
int32_t size = 0;
ustr_init(&comment);
expect(TOK_STRING, &tokenValue, &comment, &line, status);
expect(TOK_STRING, &tokenValue, NULL, &line, status);
if(isVerbose()){
printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
@ -419,10 +309,6 @@ parseUCARules(char *tag, uint32_t startline, UErrorCode *status)
{
return NULL;
}
uprv_strcat(filename,"..");
uprv_strcat(filename,U_FILE_SEP_STRING);
uprv_strcat(filename, U_ICU_UNIDATA);
uprv_strcat(filename, U_FILE_SEP_STRING);
uprv_strcat(filename, cs);
@ -437,7 +323,7 @@ parseUCARules(char *tag, uint32_t startline, UErrorCode *status)
* since the actual size needed for storing UChars
* is not known in UTF-8 byte stream
*/
size = ucbuf_size(ucbuf);
size = ucbuf_size(ucbuf) + 1;
pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
target = pTarget;
@ -448,7 +334,7 @@ parseUCARules(char *tag, uint32_t startline, UErrorCode *status)
{
c = ucbuf_getc(ucbuf, status);
if(c == QUOTE) {
quoted = (UBool)!quoted;
quoted = (UBool)!quoted;
}
/* weiv (06/26/2002): adding the following:
* - preserving spaces in commands [...]
@ -456,21 +342,23 @@ parseUCARules(char *tag, uint32_t startline, UErrorCode *status)
*/
if (c == STARTCOMMAND && !quoted)
{
/* preserve commands
* closing bracket will be handled by the
* append at the end of the loop
*/
while(c != ENDCOMMAND) {
U_APPEND_CHAR32(c, target,len);
c = ucbuf_getc(ucbuf, status);
}
} else if (c == HASH && !quoted) {
/* skip comments */
while(c != CR && c != LF) {
c = ucbuf_getc(ucbuf, status);
}
continue;
} else if (c == ESCAPE)
/* preserve commands
* closing bracket will be handled by the
* append at the end of the loop
*/
while(c != ENDCOMMAND) {
U_APPEND_CHAR32(c, target,len);
c = ucbuf_getc(ucbuf, status);
}
}
else if (c == HASH && !quoted) {
/* skip comments */
while(c != CR && c != LF) {
c = ucbuf_getc(ucbuf, status);
}
continue;
}
else if (c == ESCAPE)
{
c = unescape(ucbuf, status);
@ -483,8 +371,8 @@ parseUCARules(char *tag, uint32_t startline, UErrorCode *status)
}
else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
{
/* ignore spaces carriage returns
* and line feed unless in the form \uXXXX
/* ignore spaces carriage returns
* and line feed unless in the form \uXXXX
*/
continue;
}
@ -515,16 +403,94 @@ parseUCARules(char *tag, uint32_t startline, UErrorCode *status)
return result;
}
static struct SResource *
parseTransliterator(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
{
struct SResource *result = NULL;
struct UString *tokenValue;
FileStream *file = NULL;
char filename[256] = { '\0' };
char cs[128] = { '\0' };
uint32_t line;
int len=0;
UBool quoted = FALSE;
UCHARBUF *ucbuf=NULL;
UChar32 c = 0;
const char* cp = NULL;
UChar *pTarget = NULL;
const UChar *pSource = NULL;
int32_t size = 0;
expect(TOK_STRING, &tokenValue, NULL, &line, status);
if(isVerbose()){
printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
if (U_FAILURE(*status))
{
return NULL;
}
/* make the filename including the directory */
if (inputdir != NULL)
{
uprv_strcat(filename, inputdir);
if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
{
uprv_strcat(filename, U_FILE_SEP_STRING);
}
}
u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
if (U_FAILURE(*status))
{
return NULL;
}
uprv_strcat(filename, cs);
ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
if (U_FAILURE(*status)) {
error(line, "An error occured while opening the input file %s\n", filename);
return NULL;
}
/* We allocate more space than actually required
* since the actual size needed for storing UChars
* is not known in UTF-8 byte stream
*/
size = ucbuf_size(ucbuf) + 1;
pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
pSource = ucbuf_getBuffer(ucbuf, &size, status);
size = utrans_stripRules(pSource, size, pTarget, status);
result = string_open(bundle, tag, pTarget, size, NULL, status);
ucbuf_close(ucbuf);
uprv_free(pTarget);
T_FileStream_close(file);
return result;
}
static struct SResource *
parseString(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
{
struct UString *tokenValue;
struct SResource *result = NULL;
if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
/* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
{
return parseUCARules(tag, startline, status);
}
}*/
if(isVerbose()){
printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
@ -779,99 +745,99 @@ parseCollationElements(char *tag, uint32_t startline, UBool newCollation, UError
printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
if(!newCollation) {
return addCollation(result, startline, status);
} else {
for(;;) {
ustr_init(&comment);
token = getToken(&tokenValue, &comment, &line, status);
return addCollation(result, startline, status);
}
else {
for(;;) {
ustr_init(&comment);
token = getToken(&tokenValue, &comment, &line, status);
if (token == TOK_CLOSE_BRACE)
{
return result;
}
if (token != TOK_STRING)
{
table_close(result, status);
*status = U_INVALID_FORMAT_ERROR;
if (token == TOK_EOF)
if (token == TOK_CLOSE_BRACE)
{
error(startline, "unterminated table");
return result;
}
if (token != TOK_STRING)
{
table_close(result, status);
*status = U_INVALID_FORMAT_ERROR;
if (token == TOK_EOF)
{
error(startline, "unterminated table");
}
else
{
error(line, "Unexpected token %s", tokenNames[token]);
}
return NULL;
}
u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
if (U_FAILURE(*status))
{
table_close(result, status);
return NULL;
}
if (uprv_strcmp(subtag, "default") == 0)
{
member = parseResource(subtag, NULL, status);
if (U_FAILURE(*status))
{
table_close(result, status);
return NULL;
}
table_add(result, member, line, status);
}
else
{
error(line, "Unexpected token %s", tokenNames[token]);
token = peekToken(0, &tokenValue, &line, &comment, status);
/* this probably needs to be refactored or recursively use the parser */
/* first we assume that our collation table won't have the explicit type */
/* then, we cannot handle aliases */
if(token == TOK_OPEN_BRACE) {
token = getToken(&tokenValue, &comment, &line, status);
collationRes = table_open(bundle, subtag, NULL, status);
table_add(result, addCollation(collationRes, startline, status), startline, status);
} else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
/* we could have a table too */
token = peekToken(1, &tokenValue, &line, &comment, status);
u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
if(uprv_strcmp(typeKeyword, "alias") == 0) {
member = parseResource(subtag, NULL, status);
if (U_FAILURE(*status))
{
table_close(result, status);
return NULL;
}
table_add(result, member, line, status);
} else {
*status = U_INVALID_FORMAT_ERROR;
return NULL;
}
} else {
*status = U_INVALID_FORMAT_ERROR;
return NULL;
}
}
return NULL;
}
/*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
/*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
if (U_FAILURE(*status))
{
table_close(result, status);
return NULL;
}
if (uprv_strcmp(subtag, "default") == 0)
{
member = parseResource(subtag, NULL, status);
if (U_FAILURE(*status))
{
table_close(result, status);
return NULL;
}
table_add(result, member, line, status);
}
else
{
token = peekToken(0, &tokenValue, &line, &comment, status);
/* this probably needs to be refactored or recursively use the parser */
/* first we assume that our collation table won't have the explicit type */
/* then, we cannot handle aliases */
if(token == TOK_OPEN_BRACE) {
token = getToken(&tokenValue, &comment, &line, status);
collationRes = table_open(bundle, subtag, NULL, status);
table_add(result, addCollation(collationRes, startline, status), startline, status);
} else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
/* we could have a table too */
token = peekToken(1, &tokenValue, &line, &comment, status);
u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
if(uprv_strcmp(typeKeyword, "alias") == 0) {
member = parseResource(subtag, NULL, status);
if (U_FAILURE(*status))
{
table_close(result, status);
return NULL;
}
table_add(result, member, line, status);
} else {
*status = U_INVALID_FORMAT_ERROR;
return NULL;
if (U_FAILURE(*status))
{
table_close(result, status);
return NULL;
}
} else {
*status = U_INVALID_FORMAT_ERROR;
return NULL;
}
}
/*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
/*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
if (U_FAILURE(*status))
{
table_close(result, status);
return NULL;
}
}
}
}
@ -1506,11 +1472,172 @@ parseInclude(char *tag, uint32_t startline, const struct UString* comment, UErro
return result;
}
U_STRING_DECL(k_type_string, "string", 6);
U_STRING_DECL(k_type_binary, "binary", 6);
U_STRING_DECL(k_type_bin, "bin", 3);
U_STRING_DECL(k_type_table, "table", 5);
U_STRING_DECL(k_type_int, "int", 3);
U_STRING_DECL(k_type_integer, "integer", 7);
U_STRING_DECL(k_type_array, "array", 5);
U_STRING_DECL(k_type_alias, "alias", 5);
U_STRING_DECL(k_type_intvector, "intvector", 9);
U_STRING_DECL(k_type_import, "import", 6);
U_STRING_DECL(k_type_include, "include", 7);
U_STRING_DECL(k_type_reserved, "reserved", 8);
/* Various non-standard plugins that create one or more special resources. */
U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
enum EResourceType
{
RT_UNKNOWN,
RT_STRING,
RT_BINARY,
RT_TABLE,
RT_INTEGER,
RT_ARRAY,
RT_ALIAS,
RT_INTVECTOR,
RT_IMPORT,
RT_INCLUDE,
RT_PLUGIN_UCA_RULES,
RT_PLUGIN_COLLATION,
RT_PLUGIN_TRANSLITERATOR,
RT_RESERVED
};
static struct {
const char *name; /* only used for debugging */
ParseResourceFunction *parseFunction;
} gResourceTypes[] = {
{"Unknown", NULL},
{"string", parseString},
{"binary", parseBinary},
{"table", parseTable},
{"integer", parseInteger},
{"array", parseArray},
{"alias", parseAlias},
{"intvector", parseIntVector},
{"import", parseImport},
{"include", parseInclude},
{"plugin[uca_rules]", parseUCARules},
{"plugin[collation]", NULL},
{"plugin[transliterator]", parseTransliterator},
{"reserved", NULL}
};
void initParser(UBool makeBinaryCollation)
{
uint32_t i;
U_STRING_INIT(k_type_string, "string", 6);
U_STRING_INIT(k_type_binary, "binary", 6);
U_STRING_INIT(k_type_bin, "bin", 3);
U_STRING_INIT(k_type_table, "table", 5);
U_STRING_INIT(k_type_int, "int", 3);
U_STRING_INIT(k_type_integer, "integer", 7);
U_STRING_INIT(k_type_array, "array", 5);
U_STRING_INIT(k_type_alias, "alias", 5);
U_STRING_INIT(k_type_intvector, "intvector", 9);
U_STRING_INIT(k_type_import, "import", 6);
U_STRING_INIT(k_type_reserved, "reserved", 8);
U_STRING_INIT(k_type_include, "include", 7);
U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
{
ustr_init(&lookahead[i].value);
}
gMakeBinaryCollation = makeBinaryCollation;
}
static enum EResourceType
parseResourceType(UErrorCode *status)
{
struct UString *tokenValue;
struct UString comment;
enum EResourceType result = RT_UNKNOWN;
uint32_t line=0;
ustr_init(&comment);
expect(TOK_STRING, &tokenValue, &comment, &line, status);
if (U_FAILURE(*status))
{
return RT_UNKNOWN;
}
*status = U_ZERO_ERROR;
if (u_strcmp(tokenValue->fChars, k_type_string) == 0) {
result = RT_STRING;
}
else if (u_strcmp(tokenValue->fChars, k_type_array) == 0) {
result = RT_ARRAY;
}
else if (u_strcmp(tokenValue->fChars, k_type_alias) == 0) {
result = RT_ALIAS;
}
else if (u_strcmp(tokenValue->fChars, k_type_table) == 0) {
result = RT_TABLE;
}
else if (u_strcmp(tokenValue->fChars, k_type_binary) == 0) {
result = RT_BINARY;
}
else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
result = RT_BINARY;
}
else if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
result = RT_INTEGER;
}
else if (u_strcmp(tokenValue->fChars, k_type_integer) == 0) {
result = RT_INTEGER;
}
else if (u_strcmp(tokenValue->fChars, k_type_intvector) == 0) {
result = RT_INTVECTOR;
}
else if (u_strcmp(tokenValue->fChars, k_type_import) == 0) {
result = RT_IMPORT;
}
else if (u_strcmp(tokenValue->fChars, k_type_include) == 0) {
result = RT_INCLUDE;
}
else if (u_strcmp(tokenValue->fChars, k_type_plugin_uca_rules) == 0) {
result = RT_PLUGIN_UCA_RULES;
}
else if (u_strcmp(tokenValue->fChars, k_type_plugin_collation) == 0) {
result = RT_PLUGIN_COLLATION;
}
else if (u_strcmp(tokenValue->fChars, k_type_plugin_transliterator) == 0) {
result = RT_PLUGIN_TRANSLITERATOR;
}
else if (u_strcmp(tokenValue->fChars, k_type_reserved) == 0) {
result = RT_RESERVED;
}
else {
char tokenBuffer[1024];
u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
*status = U_INVALID_FORMAT_ERROR;
error(line, "unknown resource type '%s'", tokenBuffer);
}
return result;
}
static struct SResource *
parseResource(char *tag, const struct UString *comment, UErrorCode *status)
{
enum ETokenType token;
enum EResourceType resType = RT_UNKNOWN;
ParseResourceFunction *parseFunction = NULL;
struct UString *tokenValue;
uint32_t startline;
uint32_t line;
@ -1620,21 +1747,13 @@ parseResource(char *tag, const struct UString *comment, UErrorCode *status)
/* We should now know what we need to parse next, so call the appropriate parser
function and return. */
switch (resType)
{
case RT_STRING: return parseString (tag, startline, comment, status);
case RT_TABLE: return parseTable (tag, startline, comment, status);
case RT_ARRAY: return parseArray (tag, startline, comment, status);
case RT_ALIAS: return parseAlias (tag, startline, comment, status);
case RT_BINARY: return parseBinary (tag, startline, comment, status);
case RT_INTEGER: return parseInteger (tag, startline, comment, status);
case RT_IMPORT: return parseImport (tag, startline, comment, status);
case RT_INCLUDE: return parseInclude (tag, startline, comment, status);
case RT_INTVECTOR: return parseIntVector (tag, startline, comment, status);
default:
parseFunction = gResourceTypes[resType].parseFunction;
if (parseFunction != NULL) {
return parseFunction(tag, startline, comment, status);
}
else {
*status = U_INTERNAL_PROGRAM_ERROR;
error(startline, "internal error: unknown resource type found and not handled");
error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].name);
}
return NULL;